New Algorithm, initial policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, trial: 0, score: 4974 trial: 1, score: 4991 trial: 2, score: 6909 trial: 3, score: 5405 trial: 4, score: 4479 Policy 0: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Average Score: 5351.6 trial: 0, score: 3485 trial: 1, score: 3677 trial: 2, score: 4413 trial: 3, score: 2429 trial: 4, score: 4799 Policy 1: 25.2331, -44.7286, -6.54229, -5.81092, -48.3555, 84.083, 8.44992, 47.5492, -51.9208, -3.88481, 81.9735, 24.34, -15.9574, -88, -0.219261, -9.18, 25.1139, 0.627963, 36.5379, -64.5657, 3.93216, 122, 21.8791, -13.8453, -85.2296, -10.6152, -14.0482, -3.78722, -1.96557, -2.40602, -37.2908, -7.53491, 106.666, 17.6364, -29.9938, 55.5776, -5.28313, -20.8864, 68.5802, 4.11423, -16.8528, 59.9919, 2.19458, 122, 0, -26.0885, 49.6617, -10.6342, -11.5077, 114.438, -4.24404, -5.86376, 44.4938, 13.4354, 102.071, 41.5167, -10.2035, 15.1251, -8.86677, -20.4988, -19.0263, 1.24808, 119.669, -63.8888, -9.57239, 106.263, Average Score: 3760.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 5351.6 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, trial: 0, score: 4550 trial: 1, score: 19517 trial: 2, score: 5566 trial: 3, score: 4183 trial: 4, score: 4157 Policy 1: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Average Score: 7594.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.8615, -48.6831, -3.65467, -1.25644, -51.4861, 87.0509, 8.90019, 56.7099, -47.0706, -10, 84.4208, 24.5471, -26.1059, -83.0144, -10.1323, -10.7432, 21.6331, 5.2089, 33.008, -58.189, 4.24963, 121.462, 25.36, -6.39297, -82.7439, -7.17193, -9.17102, -0.453552, 2.77124, -10.3934, -39.4659, -5.17563, 104.95, 12.9164, -28.2506, 58.9078, -7.04423, -23.472, 76.0039, -0.597631, -8.87142, 58.5974, 0.989857, 120.794, 7.61793, -34.533, 50.1448, -8.24431, -15.074, 108.968, -8.40303, -1.36002, 46.1803, 11.7554, 103.487, 39.3452, -15.4298, 10.5136, -13.9308, -25.2544, -17.7423, 1.04457, 122, -67.6986, -10, 105.536, trial: 0, score: 1606 trial: 1, score: 1725 trial: 2, score: 1565 trial: 3, score: 1884 trial: 4, score: 1951 Policy 1: 23.8615, -48.6831, -3.65467, -1.25644, -51.4861, 87.0509, 8.90019, 56.7099, -47.0706, -10, 84.4208, 24.5471, -26.1059, -83.0144, -10.1323, -10.7432, 21.6331, 5.2089, 33.008, -58.189, 4.24963, 121.462, 25.36, -6.39297, -82.7439, -7.17193, -9.17102, -0.453552, 2.77124, -10.3934, -39.4659, -5.17563, 104.95, 12.9164, -28.2506, 58.9078, -7.04423, -23.472, 76.0039, -0.597631, -8.87142, 58.5974, 0.989857, 120.794, 7.61793, -34.533, 50.1448, -8.24431, -15.074, 108.968, -8.40303, -1.36002, 46.1803, 11.7554, 103.487, 39.3452, -15.4298, 10.5136, -13.9308, -25.2544, -17.7423, 1.04457, 122, -67.6986, -10, 105.536, Average Score: 1746.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.1777, -55.2407, -3.39561, -0.642344, -51.3533, 88.4035, 18.2073, 53.4173, -52.5784, -5.74562, 83.4209, 31.7312, -22.868, -82.606, -4.82997, -10.8059, 19.5008, 5.14994, 37.2998, -58.6209, 9.43288, 116.284, 24.3689, -2.2655, -79.7976, -13.5906, -4.101, 5.87854, -0.47124, -6.98181, -39.0657, -5.12252, 110.684, 12.0204, -28.4694, 57.1144, -5.19418, -17.3304, 77.857, 1.22431, -11.18, 57.664, 4.59176, 122, 8.21249, -30.5428, 54.4832, -10.2244, -16.542, 113.627, -9.3444, 0.727662, 51.5603, 9.90971, 109.32, 38.732, -15.6685, 9.37762, -8.30603, -17.6332, -12.0594, 1.83587, 121.001, -65.2335, -8.56317, 101.555, trial: 0, score: 2215 trial: 1, score: 3103 trial: 2, score: 1789 trial: 3, score: 2974 trial: 4, score: 3839 Policy 1: 26.1777, -55.2407, -3.39561, -0.642344, -51.3533, 88.4035, 18.2073, 53.4173, -52.5784, -5.74562, 83.4209, 31.7312, -22.868, -82.606, -4.82997, -10.8059, 19.5008, 5.14994, 37.2998, -58.6209, 9.43288, 116.284, 24.3689, -2.2655, -79.7976, -13.5906, -4.101, 5.87854, -0.47124, -6.98181, -39.0657, -5.12252, 110.684, 12.0204, -28.4694, 57.1144, -5.19418, -17.3304, 77.857, 1.22431, -11.18, 57.664, 4.59176, 122, 8.21249, -30.5428, 54.4832, -10.2244, -16.542, 113.627, -9.3444, 0.727662, 51.5603, 9.90971, 109.32, 38.732, -15.6685, 9.37762, -8.30603, -17.6332, -12.0594, 1.83587, 121.001, -65.2335, -8.56317, 101.555, Average Score: 2784 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.3091, -49.6599, -5.70762, -3.1303, -55, 86.6449, 9.74708, 50.1437, -48.5988, -10, 85.7999, 32.7597, -17.7136, -85.6866, -1.74101, -11.3523, 20.0648, 4.90744, 40.9127, -55.367, 10.1028, 121.763, 22.0067, -11.5081, -83.8622, -5.36357, -7.94543, 1.40487, 2.35772, -8.21898, -37.6402, -5.80886, 106.225, 12.7756, -26.7456, 59.4269, -3.08284, -19.3625, 75.7952, 3.20361, -14.7318, 59.0364, 5.58126, 122, 8.93747, -35.4197, 51.0941, -5.22524, -20.3912, 108.207, -7.46895, -0.539509, 50.7413, 12.3269, 110.056, 35.4336, -21.1404, 9.58212, -7.88558, -20.0348, -19.4519, 0.93527, 116.899, -67.6859, -9.0722, 107.935, trial: 0, score: 1958 trial: 1, score: 2333 trial: 2, score: 2173 trial: 3, score: 2814 trial: 4, score: 2077 Policy 1: 29.3091, -49.6599, -5.70762, -3.1303, -55, 86.6449, 9.74708, 50.1437, -48.5988, -10, 85.7999, 32.7597, -17.7136, -85.6866, -1.74101, -11.3523, 20.0648, 4.90744, 40.9127, -55.367, 10.1028, 121.763, 22.0067, -11.5081, -83.8622, -5.36357, -7.94543, 1.40487, 2.35772, -8.21898, -37.6402, -5.80886, 106.225, 12.7756, -26.7456, 59.4269, -3.08284, -19.3625, 75.7952, 3.20361, -14.7318, 59.0364, 5.58126, 122, 8.93747, -35.4197, 51.0941, -5.22524, -20.3912, 108.207, -7.46895, -0.539509, 50.7413, 12.3269, 110.056, 35.4336, -21.1404, 9.58212, -7.88558, -20.0348, -19.4519, 0.93527, 116.899, -67.6859, -9.0722, 107.935, Average Score: 2271 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.9954, -56.7776, -7.88599, -0.634796, -55, 81.7357, 13.2553, 52.8538, -50.2265, -10, 77.7468, 29.7555, -25.216, -78.5303, -6.4302, -4.64849, 14.1894, 3.11352, 40.7368, -55.7797, 11.0647, 118.747, 21.687, -8.97342, -86.0122, -8.53332, -4.70569, 2.68535, 5.76556, -11.7301, -41.8158, -5.3892, 107.514, 17.3353, -23.5623, 51.7541, -5.8551, -18.0812, 76.1999, 6.92278, -17.6291, 60.725, 0.932957, 122, 6.04836, -31.142, 53.1143, -9.06841, -18.0828, 110.425, -3.92597, -0.345123, 49.9637, 8.94911, 101.928, 38.7372, -13.4727, 16.329, -13.4963, -21.3191, -13.7019, 2.63091, 120.66, -70.2183, -7.10456, 104.911, trial: 0, score: 5222 trial: 1, score: 4125 trial: 2, score: 4509 trial: 3, score: 4317 trial: 4, score: 4477 Policy 1: 25.9954, -56.7776, -7.88599, -0.634796, -55, 81.7357, 13.2553, 52.8538, -50.2265, -10, 77.7468, 29.7555, -25.216, -78.5303, -6.4302, -4.64849, 14.1894, 3.11352, 40.7368, -55.7797, 11.0647, 118.747, 21.687, -8.97342, -86.0122, -8.53332, -4.70569, 2.68535, 5.76556, -11.7301, -41.8158, -5.3892, 107.514, 17.3353, -23.5623, 51.7541, -5.8551, -18.0812, 76.1999, 6.92278, -17.6291, 60.725, 0.932957, 122, 6.04836, -31.142, 53.1143, -9.06841, -18.0828, 110.425, -3.92597, -0.345123, 49.9637, 8.94911, 101.928, 38.7372, -13.4727, 16.329, -13.4963, -21.3191, -13.7019, 2.63091, 120.66, -70.2183, -7.10456, 104.911, Average Score: 4530 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.9191, -57.1995, -8.07931, -2.84182, -48.1831, 81.9332, 16.4711, 48.1229, -54.7962, -5.65741, 79.7455, 26.6907, -18.6577, -82.2146, -2.26541, -8.36548, 13.1647, 6.43419, 41.1799, -64.0193, 4.87531, 113.153, 26.3225, -8.92543, -84.1499, -13.089, -4.27752, -0.789408, 2.99048, -14.0154, -36.8262, -2.56392, 106.49, 16.6116, -22.0712, 54.3263, -7.04797, -15.7751, 70.9906, -1.63688, -17.7871, 61.2201, -0.672421, 120.689, 0.376922, -31.2135, 54.0837, -5.97685, -16.3158, 111.623, -4.92518, -1.33772, 43.7525, 5.61221, 107.585, 37.2423, -18.0892, 12.721, -11.1452, -18.5261, -19.6208, 6.0362, 116.158, -65.3854, -5.80345, 103.029, trial: 0, score: 3078 trial: 1, score: 4541 trial: 2, score: 5248 trial: 3, score: 4957 trial: 4, score: 4637 Policy 1: 26.9191, -57.1995, -8.07931, -2.84182, -48.1831, 81.9332, 16.4711, 48.1229, -54.7962, -5.65741, 79.7455, 26.6907, -18.6577, -82.2146, -2.26541, -8.36548, 13.1647, 6.43419, 41.1799, -64.0193, 4.87531, 113.153, 26.3225, -8.92543, -84.1499, -13.089, -4.27752, -0.789408, 2.99048, -14.0154, -36.8262, -2.56392, 106.49, 16.6116, -22.0712, 54.3263, -7.04797, -15.7751, 70.9906, -1.63688, -17.7871, 61.2201, -0.672421, 120.689, 0.376922, -31.2135, 54.0837, -5.97685, -16.3158, 111.623, -4.92518, -1.33772, 43.7525, 5.61221, 107.585, 37.2423, -18.0892, 12.721, -11.1452, -18.5261, -19.6208, 6.0362, 116.158, -65.3854, -5.80345, 103.029, Average Score: 4492.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.9811, -57.6492, -6.75937, -1.59715, -54.8726, 81.9394, 15.6503, 55.1445, -51.5193, -7.96224, 77.3493, 28.639, -20.2581, -85.4633, -5.55895, -11.4458, 15.3431, 1.75473, 38.4, -58.765, 4.94332, 115.113, 22.9794, -7.58324, -82.8757, -6.68148, -6.53404, 6.01625, 4.25512, -13.7671, -40.4069, -4.3274, 106.4, 11.3146, -27.226, 52.0072, -5.26236, -20.4189, 73.301, 4.464, -13.6525, 62.2145, 5.63121, 120.729, 7.16169, -33.3085, 51.1419, -2.70001, -18.0879, 109.173, -2.18353, 0.871335, 47.369, 13.1025, 101.746, 41.3404, -19.0189, 13.7926, -10.0065, -26.7396, -14.4653, 4.48295, 117.757, -68.9035, -6.40253, 103.327, trial: 0, score: 4654 trial: 1, score: 3069 trial: 2, score: 3389 trial: 3, score: 3422 trial: 4, score: 2717 Policy 1: 29.9811, -57.6492, -6.75937, -1.59715, -54.8726, 81.9394, 15.6503, 55.1445, -51.5193, -7.96224, 77.3493, 28.639, -20.2581, -85.4633, -5.55895, -11.4458, 15.3431, 1.75473, 38.4, -58.765, 4.94332, 115.113, 22.9794, -7.58324, -82.8757, -6.68148, -6.53404, 6.01625, 4.25512, -13.7671, -40.4069, -4.3274, 106.4, 11.3146, -27.226, 52.0072, -5.26236, -20.4189, 73.301, 4.464, -13.6525, 62.2145, 5.63121, 120.729, 7.16169, -33.3085, 51.1419, -2.70001, -18.0879, 109.173, -2.18353, 0.871335, 47.369, 13.1025, 101.746, 41.3404, -19.0189, 13.7926, -10.0065, -26.7396, -14.4653, 4.48295, 117.757, -68.9035, -6.40253, 103.327, Average Score: 3450.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.6895, -52.1708, -4.00198, -7.37038, -55, 90.8174, 13.2301, 48.6791, -51.0115, -10, 77.2245, 30.7465, -25.1281, -85.262, -8.34507, -13.0113, 19.9494, 5.99345, 39.0427, -54.8243, 3.96607, 115.572, 20.4235, -10.1309, -88, -6.11322, -12.0809, 5.72474, 7.29481, -9.82381, -38.7797, -9.50204, 110.392, 16.2505, -29.3169, 56.008, -10.4678, -18.4411, 77.6788, 0.127987, -17.2072, 56.6421, 4.0732, 120.149, 7.13221, -30.1945, 54.577, -3.01787, -20.2704, 113.127, -2.55248, 0.777557, 45.1382, 10.2439, 102.814, 33.4096, -20.2313, 11.8891, -11.9948, -20.5716, -15.7689, 1.31991, 113.606, -63.5032, -4.73466, 106.657, trial: 0, score: 4486 trial: 1, score: 4381 trial: 2, score: 2301 trial: 3, score: 4191 trial: 4, score: 4670 Policy 1: 30.6895, -52.1708, -4.00198, -7.37038, -55, 90.8174, 13.2301, 48.6791, -51.0115, -10, 77.2245, 30.7465, -25.1281, -85.262, -8.34507, -13.0113, 19.9494, 5.99345, 39.0427, -54.8243, 3.96607, 115.572, 20.4235, -10.1309, -88, -6.11322, -12.0809, 5.72474, 7.29481, -9.82381, -38.7797, -9.50204, 110.392, 16.2505, -29.3169, 56.008, -10.4678, -18.4411, 77.6788, 0.127987, -17.2072, 56.6421, 4.0732, 120.149, 7.13221, -30.1945, 54.577, -3.01787, -20.2704, 113.127, -2.55248, 0.777557, 45.1382, 10.2439, 102.814, 33.4096, -20.2313, 11.8891, -11.9948, -20.5716, -15.7689, 1.31991, 113.606, -63.5032, -4.73466, 106.657, Average Score: 4005.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.1793, -49.2822, -7.04102, -7.62142, -50.1757, 90.3813, 16.4745, 52.4161, -51.6707, -10, 83.9882, 29.8591, -18.1571, -79.8807, -11.389, -9.67534, 18.5408, 1.11034, 37.1313, -55.3936, 4.30705, 120.048, 24.3435, -11.1354, -83.2918, -6.77194, -6.82065, 3.29522, 6.28769, -9.18272, -41.3518, -1.91025, 109.696, 8.05528, -27.7929, 53.9468, -4.51912, -19.3254, 71.7463, -2.56193, -11.5937, 62.2683, 3.00593, 122, 4.6615, -30.8573, 49.8061, -6.34515, -22.3303, 107.583, -7.21086, 4.2218, 45.9888, 9.67619, 101.63, 39.9443, -19.2468, 14.4216, -8.53465, -20.2274, -13.8773, 6.89866, 120.64, -70.9817, -10, 105.316, trial: 0, score: 2310 trial: 1, score: 3101 trial: 2, score: 3229 trial: 3, score: 1759 trial: 4, score: 2077 Policy 1: 23.1793, -49.2822, -7.04102, -7.62142, -50.1757, 90.3813, 16.4745, 52.4161, -51.6707, -10, 83.9882, 29.8591, -18.1571, -79.8807, -11.389, -9.67534, 18.5408, 1.11034, 37.1313, -55.3936, 4.30705, 120.048, 24.3435, -11.1354, -83.2918, -6.77194, -6.82065, 3.29522, 6.28769, -9.18272, -41.3518, -1.91025, 109.696, 8.05528, -27.7929, 53.9468, -4.51912, -19.3254, 71.7463, -2.56193, -11.5937, 62.2683, 3.00593, 122, 4.6615, -30.8573, 49.8061, -6.34515, -22.3303, 107.583, -7.21086, 4.2218, 45.9888, 9.67619, 101.63, 39.9443, -19.2468, 14.4216, -8.53465, -20.2274, -13.8773, 6.89866, 120.64, -70.9817, -10, 105.316, Average Score: 2495.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.1098, -56.8567, -9.16556, -8.80702, -54.9768, 85.8901, 14.9848, 53.4616, -56.1007, -10, 78.804, 29.4969, -21.6626, -81.904, -9.53856, -6.65766, 14.3068, 0.344552, 36.9636, -62.1866, 5.88226, 113.477, 25.6297, -6.51954, -85.2073, -8.83104, -11.9355, 3.88934, 1.11394, -15.1808, -35.2287, -10, 104.302, 12.0772, -25.4557, 52.7217, -10.9525, -16.8643, 76.0724, 5.22775, -9.12333, 54.1535, 6.04715, 118.641, 8.57582, -33.7804, 51.0209, -4.14243, -18.1906, 108.195, -3.45983, -3.1358, 43.14, 11.8294, 104.286, 35.358, -22.6101, 9.75819, -9.57246, -25.5945, -19.834, -1.06918, 119.95, -70.0977, -5.81828, 107.9, trial: 0, score: 3496 trial: 1, score: 2981 trial: 2, score: 2496 trial: 3, score: 3510 trial: 4, score: 3997 Policy 1: 25.1098, -56.8567, -9.16556, -8.80702, -54.9768, 85.8901, 14.9848, 53.4616, -56.1007, -10, 78.804, 29.4969, -21.6626, -81.904, -9.53856, -6.65766, 14.3068, 0.344552, 36.9636, -62.1866, 5.88226, 113.477, 25.6297, -6.51954, -85.2073, -8.83104, -11.9355, 3.88934, 1.11394, -15.1808, -35.2287, -10, 104.302, 12.0772, -25.4557, 52.7217, -10.9525, -16.8643, 76.0724, 5.22775, -9.12333, 54.1535, 6.04715, 118.641, 8.57582, -33.7804, 51.0209, -4.14243, -18.1906, 108.195, -3.45983, -3.1358, 43.14, 11.8294, 104.286, 35.358, -22.6101, 9.75819, -9.57246, -25.5945, -19.834, -1.06918, 119.95, -70.0977, -5.81828, 107.9, Average Score: 3296 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.3544, -50.3857, -0.144316, -1.42971, -50.6117, 85.637, 9.65697, 54.5002, -52.2221, -10, 79.3986, 24.5822, -17.7846, -87.9263, -8.24362, -6.67288, 21.8697, -1.60487, 35.0062, -55.2456, 5.73628, 118.882, 24.2799, -1.84639, -84.106, -13.5684, -10.5927, 3.94646, 7.36104, -11.8482, -42.9275, -3.2759, 106.705, 15.0738, -30.073, 59.1896, -9.88267, -21.1227, 77.4468, 4.83396, -18.1762, 59.5363, 5.82933, 119.649, 5.60135, -26.5331, 53.3478, -9.60082, -16.9647, 105.529, -1.42715, -0.691226, 44.7486, 11.1364, 110.649, 38.1892, -20.0472, 10.6161, -14.014, -19.8622, -17.6367, -1.28477, 122, -63.0587, -7.65898, 107.982, trial: 0, score: 3230 trial: 1, score: 3933 trial: 2, score: 3646 trial: 3, score: 3478 trial: 4, score: 4157 Policy 1: 23.3544, -50.3857, -0.144316, -1.42971, -50.6117, 85.637, 9.65697, 54.5002, -52.2221, -10, 79.3986, 24.5822, -17.7846, -87.9263, -8.24362, -6.67288, 21.8697, -1.60487, 35.0062, -55.2456, 5.73628, 118.882, 24.2799, -1.84639, -84.106, -13.5684, -10.5927, 3.94646, 7.36104, -11.8482, -42.9275, -3.2759, 106.705, 15.0738, -30.073, 59.1896, -9.88267, -21.1227, 77.4468, 4.83396, -18.1762, 59.5363, 5.82933, 119.649, 5.60135, -26.5331, 53.3478, -9.60082, -16.9647, 105.529, -1.42715, -0.691226, 44.7486, 11.1364, 110.649, 38.1892, -20.0472, 10.6161, -14.014, -19.8622, -17.6367, -1.28477, 122, -63.0587, -7.65898, 107.982, Average Score: 3688.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.5063, -55.5535, -4.07019, -8.29147, -52.8262, 86.2038, 17.4202, 52.9177, -49.4231, -5.41347, 84.2604, 30.197, -18.955, -80.4626, -2.49199, -8.27853, 20.6153, 0.345666, 32.0402, -59.9897, 1.78696, 114.663, 27.2429, -6.28976, -88, -7.18876, -12.839, -1.14692, 0.913913, -8.69428, -41.2556, -10, 109.281, 10.9759, -29.2357, 59.0898, -8.67924, -19.4338, 73.3311, 7.09204, -13.3812, 61.8706, -0.671852, 118.586, 0, -32.8451, 58.4622, -3.03713, -16.0084, 113.864, -8.39212, 4.03108, 51.4795, 12.5531, 107.127, 34.821, -15.7547, 9.83393, -9.52997, -22.7086, -15.974, 7.6518, 116.593, -65.4265, -6.46074, 107.396, trial: 0, score: 3110 trial: 1, score: 3197 trial: 2, score: 2173 trial: 3, score: 3261 trial: 4, score: 3133 Policy 1: 30.5063, -55.5535, -4.07019, -8.29147, -52.8262, 86.2038, 17.4202, 52.9177, -49.4231, -5.41347, 84.2604, 30.197, -18.955, -80.4626, -2.49199, -8.27853, 20.6153, 0.345666, 32.0402, -59.9897, 1.78696, 114.663, 27.2429, -6.28976, -88, -7.18876, -12.839, -1.14692, 0.913913, -8.69428, -41.2556, -10, 109.281, 10.9759, -29.2357, 59.0898, -8.67924, -19.4338, 73.3311, 7.09204, -13.3812, 61.8706, -0.671852, 118.586, 0, -32.8451, 58.4622, -3.03713, -16.0084, 113.864, -8.39212, 4.03108, 51.4795, 12.5531, 107.127, 34.821, -15.7547, 9.83393, -9.52997, -22.7086, -15.974, 7.6518, 116.593, -65.4265, -6.46074, 107.396, Average Score: 2974.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.9259, -50.3744, -2.28585, -0.0203546, -50.596, 90.9089, 18.7181, 47.9489, -48.0578, -6.3954, 80.2153, 31.4592, -27.281, -81.4547, -10.0064, -13.5661, 19.1444, -1.60611, 35.3396, -59.264, 8.12393, 113.986, 21.8196, -5.10221, -88, -13.8172, -8.98042, 6.25186, 1.51615, -12.6119, -34.2854, -10, 112.757, 10.8719, -26.1573, 58.8247, -9.81607, -21.9405, 75.5544, 3.70316, -8.9105, 62.4645, -1.41541, 122, 2.64691, -25.9212, 52.2913, -7.51404, -21.9505, 107.784, -2.85359, -3.17211, 49.0697, 3.52386, 108.324, 35.9544, -19.764, 12.4195, -13.4085, -19.2501, -19.2072, 1.24707, 121.201, -65.0762, -3.39116, 105.816, trial: 0, score: 3686 trial: 1, score: 4575 trial: 2, score: 4317 trial: 3, score: 3966 trial: 4, score: 3862 Policy 1: 26.9259, -50.3744, -2.28585, -0.0203546, -50.596, 90.9089, 18.7181, 47.9489, -48.0578, -6.3954, 80.2153, 31.4592, -27.281, -81.4547, -10.0064, -13.5661, 19.1444, -1.60611, 35.3396, -59.264, 8.12393, 113.986, 21.8196, -5.10221, -88, -13.8172, -8.98042, 6.25186, 1.51615, -12.6119, -34.2854, -10, 112.757, 10.8719, -26.1573, 58.8247, -9.81607, -21.9405, 75.5544, 3.70316, -8.9105, 62.4645, -1.41541, 122, 2.64691, -25.9212, 52.2913, -7.51404, -21.9505, 107.784, -2.85359, -3.17211, 49.0697, 3.52386, 108.324, 35.9544, -19.764, 12.4195, -13.4085, -19.2501, -19.2072, 1.24707, 121.201, -65.0762, -3.39116, 105.816, Average Score: 4081.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.0004, -55.0873, -4.55294, -6.58841, -48.7121, 86.6252, 16.5397, 53.2408, -48.2349, -7.38485, 78.9725, 31.6993, -18.9633, -78.8445, -6.66748, -12.7811, 16.7614, 2.74658, 33.5318, -57.9464, 3.72711, 116.685, 26.1459, -1.85389, -88, -4.20067, -8.15689, 5.90787, 0.6653, -11.2067, -35.0518, -5.5081, 108.143, 13.9887, -28.4611, 53.1851, -4.62421, -21.7982, 76.9601, 4.21293, -8.80792, 62.6766, 3.63782, 122, 4.52909, -32.55, 50.1903, -6.61319, -20, 105.243, -8.26773, -2.30368, 51.0899, 7.22058, 107.782, 34.5619, -20.1829, 16.265, -8.53042, -21.6136, -16.8949, 4.60036, 117.746, -66.2263, -5.48364, 110.499, trial: 0, score: 2438 trial: 1, score: 3517 trial: 2, score: 3422 trial: 3, score: 3134 trial: 4, score: 3485 Policy 1: 25.0004, -55.0873, -4.55294, -6.58841, -48.7121, 86.6252, 16.5397, 53.2408, -48.2349, -7.38485, 78.9725, 31.6993, -18.9633, -78.8445, -6.66748, -12.7811, 16.7614, 2.74658, 33.5318, -57.9464, 3.72711, 116.685, 26.1459, -1.85389, -88, -4.20067, -8.15689, 5.90787, 0.6653, -11.2067, -35.0518, -5.5081, 108.143, 13.9887, -28.4611, 53.1851, -4.62421, -21.7982, 76.9601, 4.21293, -8.80792, 62.6766, 3.63782, 122, 4.52909, -32.55, 50.1903, -6.61319, -20, 105.243, -8.26773, -2.30368, 51.0899, 7.22058, 107.782, 34.5619, -20.1829, 16.265, -8.53042, -21.6136, -16.8949, 4.60036, 117.746, -66.2263, -5.48364, 110.499, Average Score: 3199.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.2172, -48.5177, -1.10147, -4.98325, -46.617, 81.4788, 16.8579, 49.9636, -49.413, -10, 76.6991, 31.6712, -18.9149, -78.4971, -9.58398, -10.5797, 21.7139, 1.43117, 39.3085, -64.4931, 10.792, 121.007, 24.2064, -7.40539, -82.0336, -13.0625, -6.84545, 6.47083, 2.9264, -11.7736, -38.9369, -6.25141, 107.168, 17.9208, -25.007, 59.7041, -8.17795, -24.1654, 71.12, 4.88776, -12.7453, 58.3376, 5.30032, 117.679, 7.23007, -27.5034, 58.6485, -11.7525, -17.7114, 111.981, -5.5313, -0.990699, 47.324, 12.8207, 103.514, 35.5863, -16.1875, 14.5695, -14.0892, -18.8833, -18.8213, 3.35131, 116.471, -67.317, -10, 107.881, trial: 0, score: 3686 trial: 1, score: 3517 trial: 2, score: 3805 trial: 3, score: 3999 trial: 4, score: 4062 Policy 1: 27.2172, -48.5177, -1.10147, -4.98325, -46.617, 81.4788, 16.8579, 49.9636, -49.413, -10, 76.6991, 31.6712, -18.9149, -78.4971, -9.58398, -10.5797, 21.7139, 1.43117, 39.3085, -64.4931, 10.792, 121.007, 24.2064, -7.40539, -82.0336, -13.0625, -6.84545, 6.47083, 2.9264, -11.7736, -38.9369, -6.25141, 107.168, 17.9208, -25.007, 59.7041, -8.17795, -24.1654, 71.12, 4.88776, -12.7453, 58.3376, 5.30032, 117.679, 7.23007, -27.5034, 58.6485, -11.7525, -17.7114, 111.981, -5.5313, -0.990699, 47.324, 12.8207, 103.514, 35.5863, -16.1875, 14.5695, -14.0892, -18.8833, -18.8213, 3.35131, 116.471, -67.317, -10, 107.881, Average Score: 3813.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.2336, -51.4508, 0.0569506, -5.5947, -46.586, 90.0357, 9.27535, 56.3095, -55.0476, -10, 78.1577, 32.5185, -23.4621, -83.682, -2.78583, -10.3284, 17.4148, -2.54851, 31.5622, -62.6583, 7.10594, 115.766, 28.5837, -5.9271, -84.6773, -8.2057, -10.0299, 1.57735, 5.02618, -15.4562, -39.7186, -8.73066, 109.707, 8.59325, -21.1483, 57.695, -9.46977, -19.4843, 72.6139, 4.83945, -14.5349, 56.2897, 0.312056, 122, 8.53904, -30.8151, 55.0677, -11.1384, -16.2279, 112.88, -1.22732, -1.89405, 47.1089, 10.4881, 107.224, 32.7576, -14.1031, 12.7725, -5.19663, -24.8845, -13.3117, 0.738558, 117.12, -70.3899, -10, 109.849, trial: 0, score: 3366 trial: 1, score: 4189 trial: 2, score: 3037 trial: 3, score: 2846 trial: 4, score: 3702 Policy 1: 22.2336, -51.4508, 0.0569506, -5.5947, -46.586, 90.0357, 9.27535, 56.3095, -55.0476, -10, 78.1577, 32.5185, -23.4621, -83.682, -2.78583, -10.3284, 17.4148, -2.54851, 31.5622, -62.6583, 7.10594, 115.766, 28.5837, -5.9271, -84.6773, -8.2057, -10.0299, 1.57735, 5.02618, -15.4562, -39.7186, -8.73066, 109.707, 8.59325, -21.1483, 57.695, -9.46977, -19.4843, 72.6139, 4.83945, -14.5349, 56.2897, 0.312056, 122, 8.53904, -30.8151, 55.0677, -11.1384, -16.2279, 112.88, -1.22732, -1.89405, 47.1089, 10.4881, 107.224, 32.7576, -14.1031, 12.7725, -5.19663, -24.8845, -13.3117, 0.738558, 117.12, -70.3899, -10, 109.849, Average Score: 3428 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.9903, -55.338, -7.47214, -0.355691, -53.0729, 81.9202, 11.5563, 53.5177, -47.9685, -5.73636, 86.0738, 25.9528, -25.5398, -87.7496, -8.60421, -14.0484, 19.3934, -2.6758, 31.919, -58.5534, 6.71422, 113.549, 21.0092, -2.05186, -86.7976, -13.829, -11.5328, 2.20268, 7.41524, -9.1051, -36.0416, -6.51651, 109.779, 9.59661, -29.7174, 57.6723, -4.52712, -19.0086, 75.1643, 4.76441, -12.3752, 62.4625, -0.864035, 122, 4.75058, -34.8854, 53.7748, -5.82689, -20.5279, 108.979, -7.86198, 3.14293, 48.7058, 10.7114, 104.878, 39.876, -22.1376, 15.5548, -8.09338, -26.6675, -15.8546, 0.546251, 119.171, -72.1483, -4.45367, 107.98, trial: 0, score: 1735 trial: 1, score: 1757 trial: 2, score: 1918 trial: 3, score: 1565 trial: 4, score: 1853 Policy 1: 30.9903, -55.338, -7.47214, -0.355691, -53.0729, 81.9202, 11.5563, 53.5177, -47.9685, -5.73636, 86.0738, 25.9528, -25.5398, -87.7496, -8.60421, -14.0484, 19.3934, -2.6758, 31.919, -58.5534, 6.71422, 113.549, 21.0092, -2.05186, -86.7976, -13.829, -11.5328, 2.20268, 7.41524, -9.1051, -36.0416, -6.51651, 109.779, 9.59661, -29.7174, 57.6723, -4.52712, -19.0086, 75.1643, 4.76441, -12.3752, 62.4625, -0.864035, 122, 4.75058, -34.8854, 53.7748, -5.82689, -20.5279, 108.979, -7.86198, 3.14293, 48.7058, 10.7114, 104.878, 39.876, -22.1376, 15.5548, -8.09338, -26.6675, -15.8546, 0.546251, 119.171, -72.1483, -4.45367, 107.98, Average Score: 1765.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.2897, -54.0693, -4.79054, -9.84777, -48.7361, 88.8882, 16.6703, 52.4404, -48.7208, -6.26452, 79.8554, 28.0316, -24.6192, -86.6419, -2.79528, -13.0083, 18.3222, 0.922784, 38.7989, -58.2471, 10.3417, 115.944, 28.0647, -1.97246, -86.6853, -7.08387, -10.5888, -2.49336, 4.95921, -5.67482, -34.6217, -7.3088, 110.17, 13.6424, -27.2264, 53.0789, -6.52186, -16.1183, 74.8017, 1.18159, -10.3097, 56.67, 4.82696, 122, 5.53238, -27.2357, 55.4816, -6.74813, -20.3789, 104.745, -5.69658, 5.95482, 45.4191, 3.28067, 111.185, 39.0252, -17.1646, 17.0358, -14.5827, -26.0998, -11.3345, -0.681542, 113.214, -71.6018, -7.50288, 105.53, trial: 0, score: 4262 trial: 1, score: 4541 trial: 2, score: 2078 trial: 3, score: 4000 trial: 4, score: 2781 Policy 1: 23.2897, -54.0693, -4.79054, -9.84777, -48.7361, 88.8882, 16.6703, 52.4404, -48.7208, -6.26452, 79.8554, 28.0316, -24.6192, -86.6419, -2.79528, -13.0083, 18.3222, 0.922784, 38.7989, -58.2471, 10.3417, 115.944, 28.0647, -1.97246, -86.6853, -7.08387, -10.5888, -2.49336, 4.95921, -5.67482, -34.6217, -7.3088, 110.17, 13.6424, -27.2264, 53.0789, -6.52186, -16.1183, 74.8017, 1.18159, -10.3097, 56.67, 4.82696, 122, 5.53238, -27.2357, 55.4816, -6.74813, -20.3789, 104.745, -5.69658, 5.95482, 45.4191, 3.28067, 111.185, 39.0252, -17.1646, 17.0358, -14.5827, -26.0998, -11.3345, -0.681542, 113.214, -71.6018, -7.50288, 105.53, Average Score: 3532.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.3979, -54.0022, -0.880328, -2.42221, -55, 89.4023, 14.2771, 53.5448, -51.5765, -7.5409, 82.7573, 30.907, -25.5803, -85.2425, -9.30819, -6.47466, 14.2814, -1.98095, 39.4822, -61.6123, 8.55723, 112.678, 27.8296, -8.50676, -87.5728, -11.8735, -9.35006, 0.536221, 3.69016, -11.3859, -37.1333, -3.21477, 110.168, 9.5974, -29.388, 55.3321, -5.56129, -22.328, 70.9413, 1.97119, -15.208, 59.8777, -1.28798, 119.598, 1.13551, -34.717, 57.792, -3.83882, -20.0878, 108.584, -1.22153, -2.87733, 47.1037, 11.9741, 101.952, 38.9215, -15.0597, 11.681, -11.7521, -23.1929, -11.3371, -1.41669, 115.521, -66.1019, -10, 108.338, trial: 0, score: 1629 trial: 1, score: 2301 trial: 2, score: 4191 trial: 3, score: 4703 trial: 4, score: 3517 Policy 1: 29.3979, -54.0022, -0.880328, -2.42221, -55, 89.4023, 14.2771, 53.5448, -51.5765, -7.5409, 82.7573, 30.907, -25.5803, -85.2425, -9.30819, -6.47466, 14.2814, -1.98095, 39.4822, -61.6123, 8.55723, 112.678, 27.8296, -8.50676, -87.5728, -11.8735, -9.35006, 0.536221, 3.69016, -11.3859, -37.1333, -3.21477, 110.168, 9.5974, -29.388, 55.3321, -5.56129, -22.328, 70.9413, 1.97119, -15.208, 59.8777, -1.28798, 119.598, 1.13551, -34.717, 57.792, -3.83882, -20.0878, 108.584, -1.22153, -2.87733, 47.1037, 11.9741, 101.952, 38.9215, -15.0597, 11.681, -11.7521, -23.1929, -11.3371, -1.41669, 115.521, -66.1019, -10, 108.338, Average Score: 3268.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.7093, -49.832, -6.94594, -7.98631, -47.7418, 83.3054, 15.7267, 47.9422, -55.3363, -7.21861, 76.78, 28.4362, -17.4008, -84.1653, -2.40438, -4.35795, 20.483, 0.416253, 33.9644, -63.3796, 2.87046, 121.786, 23.813, -4.08625, -79.659, -6.00118, -8.41776, 4.67238, 4.08489, -12.5138, -41.3594, -9.22857, 108.681, 12.5089, -21.4871, 52.7794, -5.02115, -19.3989, 73.7579, 0.576504, -11.6922, 61.943, -0.979839, 118.143, 4.62811, -26.7779, 57.4591, -11.1208, -22.3841, 110.968, -2.02714, 0.166854, 48.7237, 6.64517, 107.072, 34.5458, -14.5141, 15.0933, -5.54944, -25.665, -15.736, 3.0744, 114.15, -70.481, -10, 108.248, trial: 0, score: 4509 trial: 1, score: 4093 trial: 2, score: 4318 trial: 3, score: 4637 trial: 4, score: 4573 Policy 1: 29.7093, -49.832, -6.94594, -7.98631, -47.7418, 83.3054, 15.7267, 47.9422, -55.3363, -7.21861, 76.78, 28.4362, -17.4008, -84.1653, -2.40438, -4.35795, 20.483, 0.416253, 33.9644, -63.3796, 2.87046, 121.786, 23.813, -4.08625, -79.659, -6.00118, -8.41776, 4.67238, 4.08489, -12.5138, -41.3594, -9.22857, 108.681, 12.5089, -21.4871, 52.7794, -5.02115, -19.3989, 73.7579, 0.576504, -11.6922, 61.943, -0.979839, 118.143, 4.62811, -26.7779, 57.4591, -11.1208, -22.3841, 110.968, -2.02714, 0.166854, 48.7237, 6.64517, 107.072, 34.5458, -14.5141, 15.0933, -5.54944, -25.665, -15.736, 3.0744, 114.15, -70.481, -10, 108.248, Average Score: 4426 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.4081, -57.0936, -3.6219, -9.08493, -49.9779, 83.4837, 18.0615, 56.4563, -48.2705, -8.41635, 82.7563, 28.3306, -20.7191, -87.8264, -9.13482, -10.4097, 14.0516, -1.01345, 35.6219, -60.6614, 8.34319, 113.661, 28.1966, -11.069, -85.2727, -11.0112, -9.48275, 4.36409, 5.95497, -15.0286, -37.0594, -7.42443, 103.755, 14.2552, -22.0773, 55.5109, -9.06662, -19.331, 75.0436, -0.556125, -15.0352, 57.4174, 0.892738, 120.485, 4.15757, -30.6349, 49.9698, -11.7653, -22.5593, 112.356, -7.0582, 2.8166, 47.4785, 7.33156, 104.459, 32.0917, -22.0309, 10.3831, -14.7199, -26.2002, -13.5814, 6.95034, 120.785, -68.862, -6.70584, 108.014, trial: 0, score: 2269 trial: 1, score: 2911 trial: 2, score: 2653 trial: 3, score: 2015 trial: 4, score: 3391 Policy 1: 29.4081, -57.0936, -3.6219, -9.08493, -49.9779, 83.4837, 18.0615, 56.4563, -48.2705, -8.41635, 82.7563, 28.3306, -20.7191, -87.8264, -9.13482, -10.4097, 14.0516, -1.01345, 35.6219, -60.6614, 8.34319, 113.661, 28.1966, -11.069, -85.2727, -11.0112, -9.48275, 4.36409, 5.95497, -15.0286, -37.0594, -7.42443, 103.755, 14.2552, -22.0773, 55.5109, -9.06662, -19.331, 75.0436, -0.556125, -15.0352, 57.4174, 0.892738, 120.485, 4.15757, -30.6349, 49.9698, -11.7653, -22.5593, 112.356, -7.0582, 2.8166, 47.4785, 7.33156, 104.459, 32.0917, -22.0309, 10.3831, -14.7199, -26.2002, -13.5814, 6.95034, 120.785, -68.862, -6.70584, 108.014, Average Score: 2647.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.3125, -57.2389, 0.522175, -3.2069, -52.2567, 83.4797, 9.23308, 55.8916, -52.5949, -10, 85.0072, 24.1539, -24.0809, -85.1188, -10.8212, -13.7806, 18.4749, 2.77831, 37.0687, -62.8596, 3.28305, 117.273, 25.6575, -7.25474, -86.9269, -5.63538, -11.7606, -1.66828, 4.42281, -6.14373, -37.1433, -9.74743, 111.447, 13.412, -30.1423, 58.4064, -3.92067, -24.9175, 75.2536, 3.04496, -15.3904, 58.3208, 5.29171, 118.199, 5.9881, -30.3137, 55.7716, -2.13301, -20.4924, 106.918, -5.28177, 3.21743, 44.5397, 11.8786, 106.418, 33.8389, -15.5068, 8.50271, -13.705, -19.7461, -12.9371, 3.10029, 113.34, -71.983, -9.23745, 106.881, trial: 0, score: 4157 trial: 1, score: 2333 trial: 2, score: 3325 trial: 3, score: 3487 trial: 4, score: 4029 Policy 1: 30.3125, -57.2389, 0.522175, -3.2069, -52.2567, 83.4797, 9.23308, 55.8916, -52.5949, -10, 85.0072, 24.1539, -24.0809, -85.1188, -10.8212, -13.7806, 18.4749, 2.77831, 37.0687, -62.8596, 3.28305, 117.273, 25.6575, -7.25474, -86.9269, -5.63538, -11.7606, -1.66828, 4.42281, -6.14373, -37.1433, -9.74743, 111.447, 13.412, -30.1423, 58.4064, -3.92067, -24.9175, 75.2536, 3.04496, -15.3904, 58.3208, 5.29171, 118.199, 5.9881, -30.3137, 55.7716, -2.13301, -20.4924, 106.918, -5.28177, 3.21743, 44.5397, 11.8786, 106.418, 33.8389, -15.5068, 8.50271, -13.705, -19.7461, -12.9371, 3.10029, 113.34, -71.983, -9.23745, 106.881, Average Score: 3466.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.6802, -54.7706, -3.58022, -9.06088, -47.6035, 84.1838, 18.171, 51.7526, -53.687, -9.3494, 81.3041, 31.8935, -18.2006, -86.3241, -6.0742, -8.40772, 15.0009, 5.98877, 31.6165, -63.1675, 1.7616, 120.853, 25.8943, -3.29379, -80.25, -9.48428, -7.02689, -0.46865, -1.74935, -11.7581, -42.4126, -3.85317, 106.792, 11.2952, -30.6456, 55.9287, -11.0758, -18.285, 77.9761, 0.199489, -11.3207, 53.7148, -2.15338, 120.517, 5.96048, -34.0787, 50.0693, -8.83766, -15.7103, 109.292, -6.84332, 3.53591, 45.7863, 10.7119, 106.584, 34.4225, -20.4362, 15.3597, -13.7594, -17.7314, -15.3921, 2.86623, 121.623, -63.7944, -10, 107.889, trial: 0, score: 2103 trial: 1, score: 2431 trial: 2, score: 4406 trial: 3, score: 4318 trial: 4, score: 4189 Policy 1: 31.6802, -54.7706, -3.58022, -9.06088, -47.6035, 84.1838, 18.171, 51.7526, -53.687, -9.3494, 81.3041, 31.8935, -18.2006, -86.3241, -6.0742, -8.40772, 15.0009, 5.98877, 31.6165, -63.1675, 1.7616, 120.853, 25.8943, -3.29379, -80.25, -9.48428, -7.02689, -0.46865, -1.74935, -11.7581, -42.4126, -3.85317, 106.792, 11.2952, -30.6456, 55.9287, -11.0758, -18.285, 77.9761, 0.199489, -11.3207, 53.7148, -2.15338, 120.517, 5.96048, -34.0787, 50.0693, -8.83766, -15.7103, 109.292, -6.84332, 3.53591, 45.7863, 10.7119, 106.584, 34.4225, -20.4362, 15.3597, -13.7594, -17.7314, -15.3921, 2.86623, 121.623, -63.7944, -10, 107.889, Average Score: 3489.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.8189, -51.4196, -6.6941, -8.52774, -53.0748, 84.1626, 15.003, 51.2829, -55.9038, -9.27503, 76.6773, 29.7003, -20.48, -84.1078, -1.65216, -5.63242, 14.5326, 4.56124, 39.1025, -60.0958, 2.71761, 119.131, 20.2091, -5.41099, -85.7847, -10.3613, -9.68198, 3.18303, 6.40359, -8.77635, -42.3363, -5.82742, 113.414, 16.2681, -25.2916, 56.7243, -5.26454, -16.8668, 70.4474, 6.18708, -14.2584, 57.4323, 6.54069, 120.134, 8.43089, -25.7155, 55.4992, -11.4261, -21.6558, 107.506, -9.51958, -0.101485, 46.5752, 7.73328, 109.403, 40.8235, -19.7417, 12.9406, -13.4564, -25.0126, -17.809, 2.21016, 120.516, -65.3835, -10, 103.224, trial: 0, score: 2333 trial: 1, score: 2527 trial: 2, score: 1981 trial: 3, score: 2751 trial: 4, score: 3677 Policy 1: 22.8189, -51.4196, -6.6941, -8.52774, -53.0748, 84.1626, 15.003, 51.2829, -55.9038, -9.27503, 76.6773, 29.7003, -20.48, -84.1078, -1.65216, -5.63242, 14.5326, 4.56124, 39.1025, -60.0958, 2.71761, 119.131, 20.2091, -5.41099, -85.7847, -10.3613, -9.68198, 3.18303, 6.40359, -8.77635, -42.3363, -5.82742, 113.414, 16.2681, -25.2916, 56.7243, -5.26454, -16.8668, 70.4474, 6.18708, -14.2584, 57.4323, 6.54069, 120.134, 8.43089, -25.7155, 55.4992, -11.4261, -21.6558, 107.506, -9.51958, -0.101485, 46.5752, 7.73328, 109.403, 40.8235, -19.7417, 12.9406, -13.4564, -25.0126, -17.809, 2.21016, 120.516, -65.3835, -10, 103.224, Average Score: 2653.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.7466, -54.5823, -7.50444, -4.62614, -51.5404, 82.2814, 18.7265, 50.1386, -52.4641, -7.9042, 81.6276, 28.4982, -26.0616, -84.9201, -4.97827, -9.94254, 15.5698, 1.63773, 33.2094, -61.1456, 9.84845, 119.103, 27.7356, -7.77, -81.2887, -6.38436, -10.369, -3.21109, 3.29866, -12.8259, -33.5972, -7.21777, 108.081, 11.6746, -29.4763, 60.4688, -7.16825, -20.2613, 76.4892, 2.76675, -10.5801, 58.8495, 3.69438, 121.59, 2.16688, -35.1851, 50.8529, -5.36849, -16.4753, 110.833, -8.86271, -2.84457, 47.8475, 5.65671, 103.299, 38.8617, -15.6236, 9.30621, -12.9722, -21.1472, -13.5538, 0.799244, 118.502, -68.4195, -10, 101.163, trial: 0, score: 2269 trial: 1, score: 1629 trial: 2, score: 1855 trial: 3, score: 2013 trial: 4, score: 2365 Policy 1: 30.7466, -54.5823, -7.50444, -4.62614, -51.5404, 82.2814, 18.7265, 50.1386, -52.4641, -7.9042, 81.6276, 28.4982, -26.0616, -84.9201, -4.97827, -9.94254, 15.5698, 1.63773, 33.2094, -61.1456, 9.84845, 119.103, 27.7356, -7.77, -81.2887, -6.38436, -10.369, -3.21109, 3.29866, -12.8259, -33.5972, -7.21777, 108.081, 11.6746, -29.4763, 60.4688, -7.16825, -20.2613, 76.4892, 2.76675, -10.5801, 58.8495, 3.69438, 121.59, 2.16688, -35.1851, 50.8529, -5.36849, -16.4753, 110.833, -8.86271, -2.84457, 47.8475, 5.65671, 103.299, 38.8617, -15.6236, 9.30621, -12.9722, -21.1472, -13.5538, 0.799244, 118.502, -68.4195, -10, 101.163, Average Score: 2026.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.3629, -52.427, -2.36948, -6.99995, -47.7075, 87.2779, 14.0538, 54.7737, -53.4844, -10, 79.5866, 28.0751, -26.3451, -80.2173, -5.83232, -11.1688, 16.313, 6.48028, 33.3333, -63.6781, 5.87528, 121.245, 24.5432, -6.48816, -80.644, -8.3369, -10.5067, 0.27407, 6.96101, -11.4111, -34.4391, -10, 105.793, 17.3744, -22.6198, 59.0159, -2.7848, -21.4491, 73.2596, -1.33008, -13.9439, 60.6613, -2.57346, 121.443, 1.12464, -31.0748, 50.0071, -5.67398, -16.0768, 107.218, -7.71466, -0.194693, 45.1561, 4.47661, 110.04, 38.6031, -18.4285, 14.1126, -12.5282, -19.236, -13.088, 4.57026, 119.211, -66.9531, -9.11512, 105.776, trial: 0, score: 4797 trial: 1, score: 2495 trial: 2, score: 4285 trial: 3, score: 3165 trial: 4, score: 3063 Policy 1: 28.3629, -52.427, -2.36948, -6.99995, -47.7075, 87.2779, 14.0538, 54.7737, -53.4844, -10, 79.5866, 28.0751, -26.3451, -80.2173, -5.83232, -11.1688, 16.313, 6.48028, 33.3333, -63.6781, 5.87528, 121.245, 24.5432, -6.48816, -80.644, -8.3369, -10.5067, 0.27407, 6.96101, -11.4111, -34.4391, -10, 105.793, 17.3744, -22.6198, 59.0159, -2.7848, -21.4491, 73.2596, -1.33008, -13.9439, 60.6613, -2.57346, 121.443, 1.12464, -31.0748, 50.0071, -5.67398, -16.0768, 107.218, -7.71466, -0.194693, 45.1561, 4.47661, 110.04, 38.6031, -18.4285, 14.1126, -12.5282, -19.236, -13.088, 4.57026, 119.211, -66.9531, -9.11512, 105.776, Average Score: 3561 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.0616, -57.5515, -6.57768, -1.47909, -55, 89.7594, 13.722, 56.4389, -52.8863, -7.53731, 85.0794, 30.949, -22.5792, -81.8225, -4.86036, -4.70848, 14.4192, 0.386627, 33.8318, -57.2956, 10.098, 122, 24.0993, -8.91201, -86.9866, -10.2171, -12.2793, 6.14831, 6.68925, -14.8297, -40.5735, -8.78671, 107.43, 16.5437, -22.5698, 51.9738, -10.5411, -19.1808, 74.1184, 5.68345, -10.0294, 59.4738, -1.09777, 122, 0.450528, -33.3333, 52.5971, -8.83425, -18.7855, 110.642, -7.75993, 2.55596, 52.7313, 9.2231, 110.018, 38.7402, -21.4008, 7.45472, -6.16618, -23.4432, -19.9572, 6.38463, 122, -63.0628, -10, 103.078, trial: 0, score: 2461 trial: 1, score: 2269 trial: 2, score: 2461 trial: 3, score: 2109 trial: 4, score: 2238 Policy 1: 30.0616, -57.5515, -6.57768, -1.47909, -55, 89.7594, 13.722, 56.4389, -52.8863, -7.53731, 85.0794, 30.949, -22.5792, -81.8225, -4.86036, -4.70848, 14.4192, 0.386627, 33.8318, -57.2956, 10.098, 122, 24.0993, -8.91201, -86.9866, -10.2171, -12.2793, 6.14831, 6.68925, -14.8297, -40.5735, -8.78671, 107.43, 16.5437, -22.5698, 51.9738, -10.5411, -19.1808, 74.1184, 5.68345, -10.0294, 59.4738, -1.09777, 122, 0.450528, -33.3333, 52.5971, -8.83425, -18.7855, 110.642, -7.75993, 2.55596, 52.7313, 9.2231, 110.018, 38.7402, -21.4008, 7.45472, -6.16618, -23.4432, -19.9572, 6.38463, 122, -63.0628, -10, 103.078, Average Score: 2307.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.779, -57.6179, -7.72105, -0.346682, -50.7279, 87.7735, 9.39469, 52.604, -52.645, -10, 84.7833, 32.4529, -19.0656, -87.2945, -10.9889, -11.2517, 21.931, 4.52439, 36.7504, -58.431, 6.77263, 121.494, 28.2489, -7.07325, -84.1519, -13.3886, -4.39486, 0.864157, 2.92318, -11.4366, -38.1508, -9.96872, 109.372, 10.1212, -21.0245, 51.0401, -6.53297, -22.2682, 74.9087, 1.86206, -9.05126, 56.9043, -0.858639, 122, 2.62946, -31.9577, 55.542, -7.06101, -17.6144, 106.712, -0.0165538, 0.288821, 43.646, 11.0593, 108.262, 33.2479, -21.229, 16.1373, -8.78789, -20.5707, -13.8894, 2.0582, 120.163, -64.0503, -3.90689, 106.403, trial: 0, score: 4288 trial: 1, score: 4214 trial: 2, score: 4446 trial: 3, score: 3935 trial: 4, score: 3934 Policy 1: 27.779, -57.6179, -7.72105, -0.346682, -50.7279, 87.7735, 9.39469, 52.604, -52.645, -10, 84.7833, 32.4529, -19.0656, -87.2945, -10.9889, -11.2517, 21.931, 4.52439, 36.7504, -58.431, 6.77263, 121.494, 28.2489, -7.07325, -84.1519, -13.3886, -4.39486, 0.864157, 2.92318, -11.4366, -38.1508, -9.96872, 109.372, 10.1212, -21.0245, 51.0401, -6.53297, -22.2682, 74.9087, 1.86206, -9.05126, 56.9043, -0.858639, 122, 2.62946, -31.9577, 55.542, -7.06101, -17.6144, 106.712, -0.0165538, 0.288821, 43.646, 11.0593, 108.262, 33.2479, -21.229, 16.1373, -8.78789, -20.5707, -13.8894, 2.0582, 120.163, -64.0503, -3.90689, 106.403, Average Score: 4163.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.775, -55.169, -7.55139, -3.63681, -53.3584, 87.6323, 9.16415, 49.7317, -54.651, -10, 80.9842, 31.8193, -22.1595, -80.438, -10.4298, -13.3499, 16.3139, 7.15672, 34.6053, -57.1495, 7.34874, 118.856, 29.3759, -8.94449, -83.9535, -6.24088, -13.2731, -0.589008, 3.78181, -8.14394, -42.0718, -2.8452, 109.34, 17.0486, -25.2718, 51.0749, -4.59837, -17.9062, 70.6341, 3.7907, -14.3421, 59.887, 7.03737, 120.333, 1.53895, -34.7921, 50.2967, -5.08134, -15.4285, 108.679, -9.51631, -3.2225, 45.1127, 10.9758, 105.895, 41.4105, -18.2949, 9.55663, -6.21915, -24.5545, -20.1522, 7.34819, 121.945, -70.3381, -4.54579, 106.508, trial: 0, score: 3071 trial: 1, score: 2493 trial: 2, score: 3421 trial: 3, score: 4413 trial: 4, score: 2333 Policy 1: 25.775, -55.169, -7.55139, -3.63681, -53.3584, 87.6323, 9.16415, 49.7317, -54.651, -10, 80.9842, 31.8193, -22.1595, -80.438, -10.4298, -13.3499, 16.3139, 7.15672, 34.6053, -57.1495, 7.34874, 118.856, 29.3759, -8.94449, -83.9535, -6.24088, -13.2731, -0.589008, 3.78181, -8.14394, -42.0718, -2.8452, 109.34, 17.0486, -25.2718, 51.0749, -4.59837, -17.9062, 70.6341, 3.7907, -14.3421, 59.887, 7.03737, 120.333, 1.53895, -34.7921, 50.2967, -5.08134, -15.4285, 108.679, -9.51631, -3.2225, 45.1127, 10.9758, 105.895, 41.4105, -18.2949, 9.55663, -6.21915, -24.5545, -20.1522, 7.34819, 121.945, -70.3381, -4.54579, 106.508, Average Score: 3146.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.5055, -52.453, -4.79259, -3.11533, -46.9358, 82.512, 14.4636, 48.1857, -51.9901, -8.74513, 79.4186, 32.7314, -19.5345, -86.4611, -8.70801, -12.0874, 18.4578, 6.74953, 34.1, -64.5076, 9.83811, 114.429, 27.9939, -8.62702, -84.1132, -13.5257, -10.2132, 0.610471, -0.603009, -12.7107, -41.0667, -10, 104.154, 17.8366, -20.8982, 53.4482, -10.9893, -15.5949, 71.7497, -0.00692499, -9.24458, 63.4075, 6.22627, 118.577, 0, -26.6357, 52.643, -6.20261, -17.7499, 108.897, -7.08302, -1.89689, 45.2398, 6.07563, 106.369, 40.0349, -18.8744, 10.835, -11.4545, -25.0166, -13.7942, 4.60657, 114.018, -68.3241, -7.46407, 109.798, trial: 0, score: 3751 trial: 1, score: 3645 trial: 2, score: 3997 trial: 3, score: 2909 trial: 4, score: 3997 Policy 1: 26.5055, -52.453, -4.79259, -3.11533, -46.9358, 82.512, 14.4636, 48.1857, -51.9901, -8.74513, 79.4186, 32.7314, -19.5345, -86.4611, -8.70801, -12.0874, 18.4578, 6.74953, 34.1, -64.5076, 9.83811, 114.429, 27.9939, -8.62702, -84.1132, -13.5257, -10.2132, 0.610471, -0.603009, -12.7107, -41.0667, -10, 104.154, 17.8366, -20.8982, 53.4482, -10.9893, -15.5949, 71.7497, -0.00692499, -9.24458, 63.4075, 6.22627, 118.577, 0, -26.6357, 52.643, -6.20261, -17.7499, 108.897, -7.08302, -1.89689, 45.2398, 6.07563, 106.369, 40.0349, -18.8744, 10.835, -11.4545, -25.0166, -13.7942, 4.60657, 114.018, -68.3241, -7.46407, 109.798, Average Score: 3659.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.4306, -51.3648, 0.565289, -6.14125, -54.7173, 84.5481, 16.6377, 55.701, -46.594, -5.76249, 83.9218, 29.8458, -25.9153, -78.5493, -3.69578, -11.3925, 16.5398, 5.09413, 37.6622, -57.0251, 5.67993, 114.128, 26.8932, -11.3457, -82.5451, -4.5475, -11.1524, 6.32064, 2.5904, -6.34007, -36.002, -8.31657, 107.493, 12.0985, -28.1532, 59.868, -2.95258, -24.8542, 74.3136, -1.2117, -15.8066, 62.1659, 5.40868, 122, 6.63362, -28.5323, 53.6671, -8.85397, -19.5874, 114.133, -7.30614, 4.88386, 48.8432, 6.36847, 102.496, 35.8876, -19.4267, 15.1349, -12.542, -23.7019, -11.6568, 2.87358, 119.169, -66.3779, -10, 111.041, trial: 0, score: 2365 trial: 1, score: 2205 trial: 2, score: 1981 trial: 3, score: 1597 trial: 4, score: 2013 Policy 1: 24.4306, -51.3648, 0.565289, -6.14125, -54.7173, 84.5481, 16.6377, 55.701, -46.594, -5.76249, 83.9218, 29.8458, -25.9153, -78.5493, -3.69578, -11.3925, 16.5398, 5.09413, 37.6622, -57.0251, 5.67993, 114.128, 26.8932, -11.3457, -82.5451, -4.5475, -11.1524, 6.32064, 2.5904, -6.34007, -36.002, -8.31657, 107.493, 12.0985, -28.1532, 59.868, -2.95258, -24.8542, 74.3136, -1.2117, -15.8066, 62.1659, 5.40868, 122, 6.63362, -28.5323, 53.6671, -8.85397, -19.5874, 114.133, -7.30614, 4.88386, 48.8432, 6.36847, 102.496, 35.8876, -19.4267, 15.1349, -12.542, -23.7019, -11.6568, 2.87358, 119.169, -66.3779, -10, 111.041, Average Score: 2032.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.8542, -53.6808, -2.98051, -8.68181, -50.4884, 89.0061, 16.7267, 47.5042, -52.5516, -10, 84.1667, 23.6464, -17.9078, -82.9485, -4.50337, -10.1255, 19.2182, 0.529756, 40.8041, -59.7223, 4.75985, 117.459, 25.1462, -9.12283, -85.8071, -10.1835, -12.0362, 6.23892, 6.44356, -12.886, -41.6591, -4.34411, 104.16, 11.157, -21.5328, 54.7192, -6.33703, -23.5676, 78.8722, 6.81506, -12.4344, 56.4202, 7.31833, 122, 0, -31.7374, 55.0131, -8.40788, -16.1941, 110.923, -1.27145, -0.925013, 46.4378, 4.52913, 109.068, 40.0308, -22.4699, 16.4636, -14.703, -22.9584, -18.0511, -0.93461, 122, -66.6408, -10, 109.223, trial: 0, score: 2390 trial: 1, score: 2461 trial: 2, score: 2429 trial: 3, score: 1406 trial: 4, score: 3903 Policy 1: 31.8542, -53.6808, -2.98051, -8.68181, -50.4884, 89.0061, 16.7267, 47.5042, -52.5516, -10, 84.1667, 23.6464, -17.9078, -82.9485, -4.50337, -10.1255, 19.2182, 0.529756, 40.8041, -59.7223, 4.75985, 117.459, 25.1462, -9.12283, -85.8071, -10.1835, -12.0362, 6.23892, 6.44356, -12.886, -41.6591, -4.34411, 104.16, 11.157, -21.5328, 54.7192, -6.33703, -23.5676, 78.8722, 6.81506, -12.4344, 56.4202, 7.31833, 122, 0, -31.7374, 55.0131, -8.40788, -16.1941, 110.923, -1.27145, -0.925013, 46.4378, 4.52913, 109.068, 40.0308, -22.4699, 16.4636, -14.703, -22.9584, -18.0511, -0.93461, 122, -66.6408, -10, 109.223, Average Score: 2517.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.5637, -56.4556, -3.87408, -9.45757, -51.6285, 87.4328, 13.5697, 52.67, -53.8953, -10, 85.8975, 23.4255, -20.0586, -87.564, -4.83826, -5.85193, 20.1465, 5.73228, 40.5426, -64.0214, 6.71061, 118.467, 25.5822, -2.09631, -83.3349, -8.67219, -9.36697, 5.91747, 5.59063, -9.67611, -36.9923, -5.97282, 109.098, 9.75496, -30.7527, 58.2917, -1.89737, -24.2504, 75.0825, 5.11566, -13.3081, 57.6186, 1.95634, 120.925, 6.40073, -34.5434, 50.8748, -7.32371, -14.5207, 105.416, -1.66886, -0.467066, 45.0383, 10.6951, 105.086, 39.0526, -16.2572, 9.65838, -11.1791, -24.5729, -15.9084, 5.95811, 117.544, -66.1263, -8.70748, 108.454, trial: 0, score: 1821 trial: 1, score: 4511 trial: 2, score: 1885 trial: 3, score: 4253 trial: 4, score: 3870 Policy 1: 27.5637, -56.4556, -3.87408, -9.45757, -51.6285, 87.4328, 13.5697, 52.67, -53.8953, -10, 85.8975, 23.4255, -20.0586, -87.564, -4.83826, -5.85193, 20.1465, 5.73228, 40.5426, -64.0214, 6.71061, 118.467, 25.5822, -2.09631, -83.3349, -8.67219, -9.36697, 5.91747, 5.59063, -9.67611, -36.9923, -5.97282, 109.098, 9.75496, -30.7527, 58.2917, -1.89737, -24.2504, 75.0825, 5.11566, -13.3081, 57.6186, 1.95634, 120.925, 6.40073, -34.5434, 50.8748, -7.32371, -14.5207, 105.416, -1.66886, -0.467066, 45.0383, 10.6951, 105.086, 39.0526, -16.2572, 9.65838, -11.1791, -24.5729, -15.9084, 5.95811, 117.544, -66.1263, -8.70748, 108.454, Average Score: 3268 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.4539, -50.6808, -3.14126, -8.84728, -51.7563, 90.3464, 16.6042, 54.3627, -53.865, -10, 79.4526, 33.389, -22.2521, -82.7768, -8.34881, -11.7565, 22.5869, 3.6769, 38.5448, -64.0799, 3.35796, 118.844, 25.502, -9.91248, -85.0478, -10.5322, -10.3854, 2.15509, 3.13178, -9.32691, -34.0194, -7.9048, 107.548, 14.7767, -23.3222, 54.2536, -3.42364, -19.2617, 75.7841, 6.21919, -13.9451, 58.1856, -2.05616, 122, 7.8038, -27.7622, 55.2245, -9.49197, -18.3359, 114.112, -4.25837, 5.06434, 48.1002, 10.0617, 107.031, 40.7701, -19.8666, 14.5143, -8.40657, -18.924, -18.4499, -1.69339, 119.739, -69.4239, -3.50894, 107.874, trial: 0, score: 4438 trial: 1, score: 3101 trial: 2, score: 3359 trial: 3, score: 3709 trial: 4, score: 2973 Policy 1: 26.4539, -50.6808, -3.14126, -8.84728, -51.7563, 90.3464, 16.6042, 54.3627, -53.865, -10, 79.4526, 33.389, -22.2521, -82.7768, -8.34881, -11.7565, 22.5869, 3.6769, 38.5448, -64.0799, 3.35796, 118.844, 25.502, -9.91248, -85.0478, -10.5322, -10.3854, 2.15509, 3.13178, -9.32691, -34.0194, -7.9048, 107.548, 14.7767, -23.3222, 54.2536, -3.42364, -19.2617, 75.7841, 6.21919, -13.9451, 58.1856, -2.05616, 122, 7.8038, -27.7622, 55.2245, -9.49197, -18.3359, 114.112, -4.25837, 5.06434, 48.1002, 10.0617, 107.031, 40.7701, -19.8666, 14.5143, -8.40657, -18.924, -18.4499, -1.69339, 119.739, -69.4239, -3.50894, 107.874, Average Score: 3516 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.5252, -55.0486, -9.12821, -1.72174, -49.6296, 88.2923, 17.3667, 53.8678, -56.2723, -6.06974, 84.7044, 33.0447, -19.2516, -84.2351, -5.70995, -8.74667, 15.2013, -1.58297, 32.1644, -57.6394, 6.34132, 119.2, 23.8214, -4.83378, -81.4336, -14.0794, -8.02478, 1.5603, 2.38757, -11.1512, -39.8403, -6.91681, 106.233, 11.2038, -25.7597, 51.4851, -9.69759, -22.9296, 75.8697, 1.19429, -13.0112, 60.6322, 1.08566, 120.473, 1.99418, -27.0346, 54.452, -11.8398, -21.2902, 113.047, -1.67935, 5.00803, 43.1938, 5.02069, 109.275, 35.9531, -14.4835, 16.2221, -12.5988, -20.024, -13.1446, 3.79329, 118.787, -63.4415, -7.88826, 102.49, trial: 0, score: 4413 trial: 1, score: 4285 trial: 2, score: 4190 trial: 3, score: 4477 trial: 4, score: 4414 Policy 1: 29.5252, -55.0486, -9.12821, -1.72174, -49.6296, 88.2923, 17.3667, 53.8678, -56.2723, -6.06974, 84.7044, 33.0447, -19.2516, -84.2351, -5.70995, -8.74667, 15.2013, -1.58297, 32.1644, -57.6394, 6.34132, 119.2, 23.8214, -4.83378, -81.4336, -14.0794, -8.02478, 1.5603, 2.38757, -11.1512, -39.8403, -6.91681, 106.233, 11.2038, -25.7597, 51.4851, -9.69759, -22.9296, 75.8697, 1.19429, -13.0112, 60.6322, 1.08566, 120.473, 1.99418, -27.0346, 54.452, -11.8398, -21.2902, 113.047, -1.67935, 5.00803, 43.1938, 5.02069, 109.275, 35.9531, -14.4835, 16.2221, -12.5988, -20.024, -13.1446, 3.79329, 118.787, -63.4415, -7.88826, 102.49, Average Score: 4355.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.9459, -50.6304, -5.06573, -9.37235, -48.7259, 88.78, 12.2157, 50.0309, -50.9494, -10, 78.269, 28.8111, -18.9551, -86.2239, -7.03491, -10.7803, 17.3519, 4.15356, 39.7757, -60.0316, 8.15331, 117.771, 21.0084, -8.76537, -81.4541, -9.85477, -12.5307, -0.793764, 5.75457, -9.42777, -35.6849, -8.30917, 109.549, 13.9126, -24.8855, 57.91, -5.10342, -16.5059, 78.6864, -0.794318, -9.69728, 62.8136, 6.42981, 121.594, 5.80406, -33.7873, 56.4163, -4.66015, -22.7282, 110.544, -4.47235, 1.3473, 52.6728, 13.0017, 109.435, 37.6968, -20.3936, 11.9309, -13.5584, -17.8147, -15.464, 2.36349, 121.611, -70.5128, -8.8546, 104.186, trial: 0, score: 3879 trial: 1, score: 2141 trial: 2, score: 2845 trial: 3, score: 2623 trial: 4, score: 3357 Policy 1: 24.9459, -50.6304, -5.06573, -9.37235, -48.7259, 88.78, 12.2157, 50.0309, -50.9494, -10, 78.269, 28.8111, -18.9551, -86.2239, -7.03491, -10.7803, 17.3519, 4.15356, 39.7757, -60.0316, 8.15331, 117.771, 21.0084, -8.76537, -81.4541, -9.85477, -12.5307, -0.793764, 5.75457, -9.42777, -35.6849, -8.30917, 109.549, 13.9126, -24.8855, 57.91, -5.10342, -16.5059, 78.6864, -0.794318, -9.69728, 62.8136, 6.42981, 121.594, 5.80406, -33.7873, 56.4163, -4.66015, -22.7282, 110.544, -4.47235, 1.3473, 52.6728, 13.0017, 109.435, 37.6968, -20.3936, 11.9309, -13.5584, -17.8147, -15.464, 2.36349, 121.611, -70.5128, -8.8546, 104.186, Average Score: 2969 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.3713, -52.8535, -4.26918, -4.79464, -55, 84.5561, 10.9124, 49.2122, -48.9396, -10, 76.7401, 31.0433, -26.1765, -80.1029, -3.60213, -6.79467, 20.0149, -0.49843, 32.2705, -56.506, 2.15554, 114.238, 25.5817, -6.3405, -84.9603, -4.78358, -8.62328, 5.57867, -1.95497, -7.40909, -34.5491, -9.79228, 105.455, 9.28498, -23.4379, 51.7964, -6.73582, -17.151, 70.7534, -0.12273, -13.7945, 61.084, 4.50924, 117.819, 1.27532, -31.9739, 55.4845, -9.92453, -19.8565, 112.147, -4.28727, 1.76637, 52.6517, 3.76891, 105.395, 34.0938, -13.0158, 15.6922, -7.60195, -24.0467, -17.5726, 1.53523, 120.257, -64.0507, -4.07955, 109.424, trial: 0, score: 3703 trial: 1, score: 2269 trial: 2, score: 2558 trial: 3, score: 3325 trial: 4, score: 4765 Policy 1: 31.3713, -52.8535, -4.26918, -4.79464, -55, 84.5561, 10.9124, 49.2122, -48.9396, -10, 76.7401, 31.0433, -26.1765, -80.1029, -3.60213, -6.79467, 20.0149, -0.49843, 32.2705, -56.506, 2.15554, 114.238, 25.5817, -6.3405, -84.9603, -4.78358, -8.62328, 5.57867, -1.95497, -7.40909, -34.5491, -9.79228, 105.455, 9.28498, -23.4379, 51.7964, -6.73582, -17.151, 70.7534, -0.12273, -13.7945, 61.084, 4.50924, 117.819, 1.27532, -31.9739, 55.4845, -9.92453, -19.8565, 112.147, -4.28727, 1.76637, 52.6517, 3.76891, 105.395, 34.0938, -13.0158, 15.6922, -7.60195, -24.0467, -17.5726, 1.53523, 120.257, -64.0507, -4.07955, 109.424, Average Score: 3324 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.4977, -48.5966, -0.982595, -6.83257, -54.2107, 88.0318, 12.3462, 55.2956, -47.7345, -6.62164, 77.6504, 26.4141, -21.7574, -81.6784, -4.12819, -5.76904, 14.4131, 4.25382, 34.7048, -57.6159, 11.5648, 117.865, 24.1625, -2.30567, -85.2627, -6.97096, -6.67405, -1.69897, 1.83362, -7.01633, -38.1901, -9.01404, 112.793, 12.4899, -27.6956, 55.7819, -11.668, -22.6305, 79.0045, -1.3029, -16.2608, 59.5568, -2.06099, 122, 3.48487, -32.0309, 58.9122, -10.7308, -17.0583, 113.811, -4.89785, 2.9113, 50.7519, 12.7253, 104.928, 38.4713, -15.709, 16.22, -9.55043, -23.403, -19.5519, -0.814796, 114.201, -67.6829, -5.73507, 105.933, trial: 0, score: 4253 trial: 1, score: 3229 trial: 2, score: 2973 trial: 3, score: 2813 trial: 4, score: 3104 Policy 1: 31.4977, -48.5966, -0.982595, -6.83257, -54.2107, 88.0318, 12.3462, 55.2956, -47.7345, -6.62164, 77.6504, 26.4141, -21.7574, -81.6784, -4.12819, -5.76904, 14.4131, 4.25382, 34.7048, -57.6159, 11.5648, 117.865, 24.1625, -2.30567, -85.2627, -6.97096, -6.67405, -1.69897, 1.83362, -7.01633, -38.1901, -9.01404, 112.793, 12.4899, -27.6956, 55.7819, -11.668, -22.6305, 79.0045, -1.3029, -16.2608, 59.5568, -2.06099, 122, 3.48487, -32.0309, 58.9122, -10.7308, -17.0583, 113.811, -4.89785, 2.9113, 50.7519, 12.7253, 104.928, 38.4713, -15.709, 16.22, -9.55043, -23.403, -19.5519, -0.814796, 114.201, -67.6829, -5.73507, 105.933, Average Score: 3274.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.6711, -48.6221, -3.74654, -4.08518, -52.516, 88.7096, 15.6419, 48.5117, -48.5811, -10, 81.3828, 26.9975, -23.2519, -86.4659, -6.0907, -5.59072, 16.8852, -2.40479, 39.8524, -63.7017, 3.90543, 113.842, 27.0143, -1.57008, -87.7237, -9.07306, -12.8947, 2.72176, 2.95902, -10.1365, -35.9385, -9.91635, 110.464, 17.6263, -27.3722, 58.2145, -11.6617, -17.5321, 79.584, 2.20676, -8.83789, 54.673, -0.0375116, 119.383, 7.17136, -32.0293, 51.1255, -9.91723, -22.8071, 113.488, -8.27958, -2.05784, 47.2941, 9.84286, 104.275, 38.2667, -13.8048, 11.882, -6.92568, -21.5798, -14.5901, 4.91431, 119.943, -69.1032, -10, 105.948, trial: 0, score: 2525 trial: 1, score: 3741 trial: 2, score: 1565 trial: 3, score: 4448 trial: 4, score: 4054 Policy 1: 27.6711, -48.6221, -3.74654, -4.08518, -52.516, 88.7096, 15.6419, 48.5117, -48.5811, -10, 81.3828, 26.9975, -23.2519, -86.4659, -6.0907, -5.59072, 16.8852, -2.40479, 39.8524, -63.7017, 3.90543, 113.842, 27.0143, -1.57008, -87.7237, -9.07306, -12.8947, 2.72176, 2.95902, -10.1365, -35.9385, -9.91635, 110.464, 17.6263, -27.3722, 58.2145, -11.6617, -17.5321, 79.584, 2.20676, -8.83789, 54.673, -0.0375116, 119.383, 7.17136, -32.0293, 51.1255, -9.91723, -22.8071, 113.488, -8.27958, -2.05784, 47.2941, 9.84286, 104.275, 38.2667, -13.8048, 11.882, -6.92568, -21.5798, -14.5901, 4.91431, 119.943, -69.1032, -10, 105.948, Average Score: 3266.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.3125, -50.4646, -0.580632, -0.331956, -53.076, 83.8417, 17.5262, 46.93, -52.1706, -10, 78.1189, 33.2145, -27.1986, -87.2718, -3.02115, -7.17431, 13.8042, 6.76896, 35.8995, -54.6496, 6.89153, 112.972, 26.4824, -7.91152, -80.4345, -13.6491, -9.01985, 2.89652, 7.11282, -14.6737, -40.7055, -5.81819, 113.371, 12.1687, -22.8132, 59.6916, -4.96178, -24.7896, 70.166, -0.653343, -16.6557, 57.9376, 3.36072, 119.72, 9.15153, -25.8724, 53.2313, -9.71203, -23.6744, 107.768, -3.04042, -3.16443, 43.1347, 12.3869, 103.584, 41.3097, -21.7918, 8.07258, -11.3743, -22.2853, -11.17, -0.594423, 121.317, -63.2399, -4.19998, 106.199, trial: 0, score: 4029 trial: 1, score: 4831 trial: 2, score: 3709 trial: 3, score: 2301 trial: 4, score: 3549 Policy 1: 27.3125, -50.4646, -0.580632, -0.331956, -53.076, 83.8417, 17.5262, 46.93, -52.1706, -10, 78.1189, 33.2145, -27.1986, -87.2718, -3.02115, -7.17431, 13.8042, 6.76896, 35.8995, -54.6496, 6.89153, 112.972, 26.4824, -7.91152, -80.4345, -13.6491, -9.01985, 2.89652, 7.11282, -14.6737, -40.7055, -5.81819, 113.371, 12.1687, -22.8132, 59.6916, -4.96178, -24.7896, 70.166, -0.653343, -16.6557, 57.9376, 3.36072, 119.72, 9.15153, -25.8724, 53.2313, -9.71203, -23.6744, 107.768, -3.04042, -3.16443, 43.1347, 12.3869, 103.584, 41.3097, -21.7918, 8.07258, -11.3743, -22.2853, -11.17, -0.594423, 121.317, -63.2399, -4.19998, 106.199, Average Score: 3683.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.6972, -48.5062, -6.79929, -1.02795, -48.5328, 87.975, 9.49904, 53.7761, -56.1909, -10, 81.7571, 27.9338, -19.158, -87.3451, -7.21457, -8.51511, 21.5792, -1.10458, 39.6266, -59.9815, 7.50996, 121.782, 27.0251, -5.81033, -80.29, -5.61305, -7.86918, 1.8443, 7.62096, -13.1223, -34.058, -6.68505, 106.868, 13.6089, -21.3382, 59.3363, -2.24592, -21.3802, 71.2878, 4.5602, -11.9503, 60.9817, -0.00635146, 122, 0.329056, -25.698, 53.8456, -10.1462, -23.3065, 104.506, -1.49863, -1.52074, 51.6954, 10.0162, 104.102, 35.7045, -16.7812, 12.8428, -13.1255, -26.6255, -15.346, -0.00156354, 113.055, -63.4096, -6.65673, 107.087, trial: 0, score: 4038 trial: 1, score: 4669 trial: 2, score: 4637 trial: 3, score: 4157 trial: 4, score: 4447 Policy 1: 23.6972, -48.5062, -6.79929, -1.02795, -48.5328, 87.975, 9.49904, 53.7761, -56.1909, -10, 81.7571, 27.9338, -19.158, -87.3451, -7.21457, -8.51511, 21.5792, -1.10458, 39.6266, -59.9815, 7.50996, 121.782, 27.0251, -5.81033, -80.29, -5.61305, -7.86918, 1.8443, 7.62096, -13.1223, -34.058, -6.68505, 106.868, 13.6089, -21.3382, 59.3363, -2.24592, -21.3802, 71.2878, 4.5602, -11.9503, 60.9817, -0.00635146, 122, 0.329056, -25.698, 53.8456, -10.1462, -23.3065, 104.506, -1.49863, -1.52074, 51.6954, 10.0162, 104.102, 35.7045, -16.7812, 12.8428, -13.1255, -26.6255, -15.346, -0.00156354, 113.055, -63.4096, -6.65673, 107.087, Average Score: 4389.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.789, -48.3615, -4.93557, -7.6556, -48.7034, 88.8822, 16.2095, 47.091, -46.4778, -10, 82.4922, 32.0545, -24.272, -85.7545, -4.16214, -12.0981, 20.9684, -2.15346, 35.0594, -63.6675, 2.2955, 119.116, 25.8185, -10.7627, -86.8699, -12.5517, -9.95294, 1.30914, 5.63166, -8.33773, -40.5398, -6.88694, 107.377, 12.8108, -23.7776, 55.7045, -4.8013, -20.701, 75.8323, 5.31438, -9.54781, 55.1781, 2.27255, 118.296, 9.19192, -33.493, 57.3993, -11.976, -18.0953, 112.799, -2.07018, 2.3665, 43.0754, 11.3928, 108.344, 41.2825, -16.9683, 9.45441, -11.1813, -27.1109, -12.3384, 1.50603, 115.868, -63.4798, -9.2399, 105.154, trial: 0, score: 3197 trial: 1, score: 3933 trial: 2, score: 4062 trial: 3, score: 3261 trial: 4, score: 1597 Policy 1: 30.789, -48.3615, -4.93557, -7.6556, -48.7034, 88.8822, 16.2095, 47.091, -46.4778, -10, 82.4922, 32.0545, -24.272, -85.7545, -4.16214, -12.0981, 20.9684, -2.15346, 35.0594, -63.6675, 2.2955, 119.116, 25.8185, -10.7627, -86.8699, -12.5517, -9.95294, 1.30914, 5.63166, -8.33773, -40.5398, -6.88694, 107.377, 12.8108, -23.7776, 55.7045, -4.8013, -20.701, 75.8323, 5.31438, -9.54781, 55.1781, 2.27255, 118.296, 9.19192, -33.493, 57.3993, -11.976, -18.0953, 112.799, -2.07018, 2.3665, 43.0754, 11.3928, 108.344, 41.2825, -16.9683, 9.45441, -11.1813, -27.1109, -12.3384, 1.50603, 115.868, -63.4798, -9.2399, 105.154, Average Score: 3210 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.0832, -55.4813, -5.17986, -7.43359, -54.6721, 85.7166, 17.2711, 53.2872, -47.0698, -10, 85.7751, 28.7341, -21.3235, -87.9158, -8.41903, -12.468, 21.6515, -1.7506, 39.4857, -62.6529, 2.97256, 116.035, 25.1693, -7.91328, -79.5619, -10.0338, -12.6611, 1.34517, -1.69765, -7.78791, -42.992, -2.91119, 108.796, 11.627, -23.7685, 58.4239, -7.04997, -20.5121, 78.3363, 4.97295, -13.8299, 62.7079, 6.01859, 122, 8.88114, -33.8357, 51.8945, -11.0899, -23.7298, 105.952, -2.98365, 4.41886, 45.9787, 12.1129, 110.521, 32.3768, -21.5348, 8.28043, -10.7487, -18.246, -17.3997, -0.395829, 114.142, -65.3768, -8.42531, 101.974, trial: 0, score: 1789 trial: 1, score: 3549 trial: 2, score: 2333 trial: 3, score: 3325 trial: 4, score: 2717 Policy 1: 25.0832, -55.4813, -5.17986, -7.43359, -54.6721, 85.7166, 17.2711, 53.2872, -47.0698, -10, 85.7751, 28.7341, -21.3235, -87.9158, -8.41903, -12.468, 21.6515, -1.7506, 39.4857, -62.6529, 2.97256, 116.035, 25.1693, -7.91328, -79.5619, -10.0338, -12.6611, 1.34517, -1.69765, -7.78791, -42.992, -2.91119, 108.796, 11.627, -23.7685, 58.4239, -7.04997, -20.5121, 78.3363, 4.97295, -13.8299, 62.7079, 6.01859, 122, 8.88114, -33.8357, 51.8945, -11.0899, -23.7298, 105.952, -2.98365, 4.41886, 45.9787, 12.1129, 110.521, 32.3768, -21.5348, 8.28043, -10.7487, -18.246, -17.3997, -0.395829, 114.142, -65.3768, -8.42531, 101.974, Average Score: 2742.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.7022, -54.793, -1.56704, -8.39636, -51.6152, 87.5487, 14.0897, 50.0036, -47.673, -7.04051, 81.7604, 26.0124, -20.0887, -81.1872, -7.45279, -13.6877, 18.0749, 3.46551, 41.1977, -64.222, 11.377, 121.769, 29.2622, -8.89932, -88, -7.70796, -10.0809, -2.51054, 5.8776, -13.8975, -34.8718, -3.1401, 113.335, 12.8177, -23.7735, 52.8335, -6.94028, -19.9521, 78.2885, 6.22958, -13.0529, 63.4241, 6.5793, 118.321, 5.36794, -32.0246, 58.3156, -2.95476, -23.8389, 108.431, -6.15198, -3.56536, 52.0719, 5.31293, 110.008, 41.9387, -17.9118, 13.4567, -14.4122, -21.1528, -17.7092, -1.75952, 113.728, -68.3864, -7.91291, 105.875, trial: 0, score: 1759 trial: 1, score: 2943 trial: 2, score: 1949 trial: 3, score: 2078 trial: 4, score: 2461 Policy 1: 29.7022, -54.793, -1.56704, -8.39636, -51.6152, 87.5487, 14.0897, 50.0036, -47.673, -7.04051, 81.7604, 26.0124, -20.0887, -81.1872, -7.45279, -13.6877, 18.0749, 3.46551, 41.1977, -64.222, 11.377, 121.769, 29.2622, -8.89932, -88, -7.70796, -10.0809, -2.51054, 5.8776, -13.8975, -34.8718, -3.1401, 113.335, 12.8177, -23.7735, 52.8335, -6.94028, -19.9521, 78.2885, 6.22958, -13.0529, 63.4241, 6.5793, 118.321, 5.36794, -32.0246, 58.3156, -2.95476, -23.8389, 108.431, -6.15198, -3.56536, 52.0719, 5.31293, 110.008, 41.9387, -17.9118, 13.4567, -14.4122, -21.1528, -17.7092, -1.75952, 113.728, -68.3864, -7.91291, 105.875, Average Score: 2238 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.8237, -57.8369, -3.03857, -9.65373, -46.4789, 82.9688, 9.80109, 49.8653, -54.4879, -6.33073, 81.6799, 25.6108, -24.3039, -86.0153, -6.44351, -9.97494, 21.0429, 6.21716, 39.6198, -63.2119, 6.63485, 113.91, 20.6726, -9.60692, -83.1093, -8.43506, -8.87721, -2.58269, 0.411094, -7.89884, -37.3525, -6.69913, 107.078, 9.46175, -23.1373, 59.4631, -4.69344, -18.3092, 79.1515, 2.13053, -17.004, 56.391, 3.72071, 120.947, 6.57992, -26.9646, 52.318, -9.8017, -16.3619, 113.396, -1.21747, 0.336217, 49.4207, 7.41737, 107.133, 35.4335, -19.6879, 15.294, -6.98857, -26.3482, -16.1609, 3.87996, 113.819, -68.2861, -10, 109.363, trial: 0, score: 3110 trial: 1, score: 3549 trial: 2, score: 3997 trial: 3, score: 3133 trial: 4, score: 1597 Policy 1: 30.8237, -57.8369, -3.03857, -9.65373, -46.4789, 82.9688, 9.80109, 49.8653, -54.4879, -6.33073, 81.6799, 25.6108, -24.3039, -86.0153, -6.44351, -9.97494, 21.0429, 6.21716, 39.6198, -63.2119, 6.63485, 113.91, 20.6726, -9.60692, -83.1093, -8.43506, -8.87721, -2.58269, 0.411094, -7.89884, -37.3525, -6.69913, 107.078, 9.46175, -23.1373, 59.4631, -4.69344, -18.3092, 79.1515, 2.13053, -17.004, 56.391, 3.72071, 120.947, 6.57992, -26.9646, 52.318, -9.8017, -16.3619, 113.396, -1.21747, 0.336217, 49.4207, 7.41737, 107.133, 35.4335, -19.6879, 15.294, -6.98857, -26.3482, -16.1609, 3.87996, 113.819, -68.2861, -10, 109.363, Average Score: 3077.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.6637, -56.8483, -4.48958, -3.63216, -55, 86.0886, 17.6873, 55.9493, -55.1912, -10, 83.6238, 25.1799, -24.9785, -87.231, -8.45477, -11.5255, 16.3105, -0.489693, 39.9805, -62.6556, 11.6827, 115.169, 25.8146, -5.58491, -86.26, -13.7622, -9.55529, 5.92442, 0.226484, -10.7029, -40.7559, -7.24081, 104.063, 12.5154, -28.5364, 52.6283, -2.95587, -16.3535, 79.7522, 6.72992, -18.0599, 53.6702, 3.54764, 122, 5.3267, -25.8315, 57.754, -9.31489, -15.295, 107.084, -7.05206, 6.22976, 52.6399, 11.8347, 109.135, 40.029, -15.4344, 13.5148, -12.4632, -21.5386, -11.5684, 7.92249, 114.996, -66.4857, -5.08113, 109.613, trial: 0, score: 3965 trial: 1, score: 1759 trial: 2, score: 1917 trial: 3, score: 3742 trial: 4, score: 2943 Policy 1: 29.6637, -56.8483, -4.48958, -3.63216, -55, 86.0886, 17.6873, 55.9493, -55.1912, -10, 83.6238, 25.1799, -24.9785, -87.231, -8.45477, -11.5255, 16.3105, -0.489693, 39.9805, -62.6556, 11.6827, 115.169, 25.8146, -5.58491, -86.26, -13.7622, -9.55529, 5.92442, 0.226484, -10.7029, -40.7559, -7.24081, 104.063, 12.5154, -28.5364, 52.6283, -2.95587, -16.3535, 79.7522, 6.72992, -18.0599, 53.6702, 3.54764, 122, 5.3267, -25.8315, 57.754, -9.31489, -15.295, 107.084, -7.05206, 6.22976, 52.6399, 11.8347, 109.135, 40.029, -15.4344, 13.5148, -12.4632, -21.5386, -11.5684, 7.92249, 114.996, -66.4857, -5.08113, 109.613, Average Score: 2865.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.8171, -54.4278, -5.33796, -0.471917, -55, 85.1877, 18.2272, 51.0287, -51.6225, -10, 85.6781, 27.4028, -18.3078, -87.1694, -7.40651, -9.88935, 18.9763, -1.88435, 36.675, -57.9557, 6.74437, 117.069, 26.7151, -5.38926, -87.7332, -10.4761, -9.28665, 5.94581, 1.07506, -9.72724, -40.2257, -4.11648, 104.882, 17.3902, -25.7198, 55.4989, -2.44607, -23.4931, 72.191, 5.46439, -12.7765, 58.2142, 0.649546, 119.086, 5.27621, -33.4768, 55.7009, -3.45458, -15.0241, 108.075, -4.36652, 0.961127, 45.3952, 6.81134, 104.387, 32.1813, -12.8223, 9.71028, -8.53279, -27.2218, -14.518, 6.29283, 114.717, -67.4106, -8.57176, 107.418, trial: 0, score: 3606 trial: 1, score: 4029 trial: 2, score: 3773 trial: 3, score: 3519 trial: 4, score: 3453 Policy 1: 31.8171, -54.4278, -5.33796, -0.471917, -55, 85.1877, 18.2272, 51.0287, -51.6225, -10, 85.6781, 27.4028, -18.3078, -87.1694, -7.40651, -9.88935, 18.9763, -1.88435, 36.675, -57.9557, 6.74437, 117.069, 26.7151, -5.38926, -87.7332, -10.4761, -9.28665, 5.94581, 1.07506, -9.72724, -40.2257, -4.11648, 104.882, 17.3902, -25.7198, 55.4989, -2.44607, -23.4931, 72.191, 5.46439, -12.7765, 58.2142, 0.649546, 119.086, 5.27621, -33.4768, 55.7009, -3.45458, -15.0241, 108.075, -4.36652, 0.961127, 45.3952, 6.81134, 104.387, 32.1813, -12.8223, 9.71028, -8.53279, -27.2218, -14.518, 6.29283, 114.717, -67.4106, -8.57176, 107.418, Average Score: 3676 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.6133, -55.8734, -2.61379, -6.8267, -50.7231, 89.6037, 10.7917, 47.8009, -55.0818, -10, 80.1102, 27.257, -25.736, -83.1151, -9.16516, -10.5328, 21.1638, -2.42358, 31.2874, -61.019, 5.95752, 119.214, 23.1478, -4.20171, -88, -10.5856, -7.9136, 0.46207, -0.0698712, -12.0646, -41.6118, -10, 106.322, 17.8388, -21.3284, 56.2426, -11.8155, -18.9023, 74.65, 1.12033, -17.3243, 58.1024, 4.46061, 122, 3.85206, -25.4278, 58.4946, -4.96443, -23.8067, 112.128, -6.13612, 2.67617, 47.1701, 8.14917, 105.595, 32.8321, -13.5581, 14.777, -13.1888, -23.7075, -19.0689, 5.35563, 118.387, -70.4063, -8.99238, 103.44, trial: 0, score: 4064 trial: 1, score: 3446 trial: 2, score: 3389 trial: 3, score: 4415 trial: 4, score: 3230 Policy 1: 28.6133, -55.8734, -2.61379, -6.8267, -50.7231, 89.6037, 10.7917, 47.8009, -55.0818, -10, 80.1102, 27.257, -25.736, -83.1151, -9.16516, -10.5328, 21.1638, -2.42358, 31.2874, -61.019, 5.95752, 119.214, 23.1478, -4.20171, -88, -10.5856, -7.9136, 0.46207, -0.0698712, -12.0646, -41.6118, -10, 106.322, 17.8388, -21.3284, 56.2426, -11.8155, -18.9023, 74.65, 1.12033, -17.3243, 58.1024, 4.46061, 122, 3.85206, -25.4278, 58.4946, -4.96443, -23.8067, 112.128, -6.13612, 2.67617, 47.1701, 8.14917, 105.595, 32.8321, -13.5581, 14.777, -13.1888, -23.7075, -19.0689, 5.35563, 118.387, -70.4063, -8.99238, 103.44, Average Score: 3708.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.6886, -48.2821, -5.70146, -1.76767, -52.4128, 81.4631, 17.8725, 56.2905, -55.1965, -10, 76.639, 31.8351, -20.9541, -78.8604, -4.43969, -6.1138, 16.7928, -1.87408, 35.2785, -55.9402, 7.18851, 112.692, 21.7549, -4.40642, -80.4577, -13.9441, -8.817, 0.766247, 0.785931, -10.2744, -34.6604, -2.67282, 111.613, 17.5523, -29.6224, 55.3809, -3.15068, -21.7547, 76.4387, 1.29685, -10.2566, 59.2039, 3.01846, 122, 8.9267, -31.4779, 49.7795, -10.4492, -17.6906, 105.676, -3.27634, -2.46354, 44.6163, 12.1592, 105.273, 32.7497, -18.9428, 13.3691, -7.14365, -24.4802, -11.3661, 6.82637, 121.606, -69.561, -10, 105.662, trial: 0, score: 4253 trial: 1, score: 4927 trial: 2, score: 4029 trial: 3, score: 3997 trial: 4, score: 1846 Policy 1: 27.6886, -48.2821, -5.70146, -1.76767, -52.4128, 81.4631, 17.8725, 56.2905, -55.1965, -10, 76.639, 31.8351, -20.9541, -78.8604, -4.43969, -6.1138, 16.7928, -1.87408, 35.2785, -55.9402, 7.18851, 112.692, 21.7549, -4.40642, -80.4577, -13.9441, -8.817, 0.766247, 0.785931, -10.2744, -34.6604, -2.67282, 111.613, 17.5523, -29.6224, 55.3809, -3.15068, -21.7547, 76.4387, 1.29685, -10.2566, 59.2039, 3.01846, 122, 8.9267, -31.4779, 49.7795, -10.4492, -17.6906, 105.676, -3.27634, -2.46354, 44.6163, 12.1592, 105.273, 32.7497, -18.9428, 13.3691, -7.14365, -24.4802, -11.3661, 6.82637, 121.606, -69.561, -10, 105.662, Average Score: 3810.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.9819, -56.8164, -6.36652, -1.02735, -50.4488, 86.7745, 10.3548, 48.4792, -52.2521, -7.08634, 78.9, 33.1977, -24.0166, -83.0385, -4.11032, -8.82498, 18.7983, 6.91741, 37.0972, -58.8839, 5.89787, 119.156, 25.7317, -1.61069, -87.0667, -13.0989, -10.6792, 0.0826791, 0.315361, -10.9591, -34.2422, -5.53391, 104.179, 8.20138, -28.9831, 54.8644, -9.7317, -19.5721, 74.1047, 6.70087, -15.4928, 61.9757, 7.0175, 120.409, 6.20726, -29.0624, 51.8298, -6.54473, -17.3213, 111.791, -3.64177, 4.87829, 51.8974, 8.24411, 110.074, 40.1472, -21.1202, 11.2951, -5.6227, -26.8929, -19.2958, 1.9788, 121.63, -65.289, -10, 104.412, trial: 0, score: 3133 trial: 1, score: 1565 trial: 2, score: 1309 trial: 3, score: 2744 trial: 4, score: 1750 Policy 1: 31.9819, -56.8164, -6.36652, -1.02735, -50.4488, 86.7745, 10.3548, 48.4792, -52.2521, -7.08634, 78.9, 33.1977, -24.0166, -83.0385, -4.11032, -8.82498, 18.7983, 6.91741, 37.0972, -58.8839, 5.89787, 119.156, 25.7317, -1.61069, -87.0667, -13.0989, -10.6792, 0.0826791, 0.315361, -10.9591, -34.2422, -5.53391, 104.179, 8.20138, -28.9831, 54.8644, -9.7317, -19.5721, 74.1047, 6.70087, -15.4928, 61.9757, 7.0175, 120.409, 6.20726, -29.0624, 51.8298, -6.54473, -17.3213, 111.791, -3.64177, 4.87829, 51.8974, 8.24411, 110.074, 40.1472, -21.1202, 11.2951, -5.6227, -26.8929, -19.2958, 1.9788, 121.63, -65.289, -10, 104.412, Average Score: 2100.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.5394, -50.8848, -0.17706, -0.87435, -55, 83.0424, 18.6563, 49.8389, -55.2753, -6.13134, 84.4413, 29.5648, -25.4704, -81.466, -8.77847, -5.8026, 15.8137, -0.47957, 40.053, -64.4492, 10.4255, 122, 21.1386, -7.36232, -88, -7.47184, -4.651, 3.47054, 3.37395, -14.5841, -34.6444, -9.25949, 112.106, 16.1877, -26.089, 54.7629, -5.10572, -17.1087, 72.5691, 4.81772, -13.9252, 59.9797, 5.85831, 122, 8.10191, -27.6457, 52.9622, -6.37903, -20.5283, 113.753, -3.49681, 2.93629, 47.1087, 4.85996, 106.058, 32.3273, -14.6463, 15.0433, -5.0559, -17.9726, -14.3518, 2.44801, 121.738, -69.1718, -3.48221, 102.345, trial: 0, score: 4343 trial: 1, score: 3517 trial: 2, score: 4029 trial: 3, score: 1597 trial: 4, score: 3325 Policy 1: 26.5394, -50.8848, -0.17706, -0.87435, -55, 83.0424, 18.6563, 49.8389, -55.2753, -6.13134, 84.4413, 29.5648, -25.4704, -81.466, -8.77847, -5.8026, 15.8137, -0.47957, 40.053, -64.4492, 10.4255, 122, 21.1386, -7.36232, -88, -7.47184, -4.651, 3.47054, 3.37395, -14.5841, -34.6444, -9.25949, 112.106, 16.1877, -26.089, 54.7629, -5.10572, -17.1087, 72.5691, 4.81772, -13.9252, 59.9797, 5.85831, 122, 8.10191, -27.6457, 52.9622, -6.37903, -20.5283, 113.753, -3.49681, 2.93629, 47.1087, 4.85996, 106.058, 32.3273, -14.6463, 15.0433, -5.0559, -17.9726, -14.3518, 2.44801, 121.738, -69.1718, -3.48221, 102.345, Average Score: 3362.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.6496, -48.3786, 0.429387, -1.20553, -48.9693, 91.058, 16.0894, 55.5758, -53.7713, -8.12757, 80.9351, 26.7962, -25.6021, -85.2078, -3.1772, -5.60653, 18.5833, -1.69922, 33.762, -62.0643, 10.5996, 120.379, 26.1496, -1.79548, -85.988, -7.98489, -8.53012, 0.685067, 0.426117, -10.145, -34.3784, -8.56426, 109.22, 8.65653, -21.4381, 54.2528, -10.5763, -15.299, 77.4904, -1.2215, -15.9854, 57.806, 6.44904, 121.894, 2.27672, -29.1383, 51.1203, -10.8597, -18.8063, 104.84, -1.50967, -0.104183, 52.0697, 10.6917, 109.465, 38.433, -14.9109, 12.0529, -12.5995, -21.9996, -17.7198, 3.39459, 113.144, -62.9154, -10, 105.913, trial: 0, score: 3037 trial: 1, score: 3005 trial: 2, score: 3517 trial: 3, score: 2621 trial: 4, score: 4093 Policy 1: 22.6496, -48.3786, 0.429387, -1.20553, -48.9693, 91.058, 16.0894, 55.5758, -53.7713, -8.12757, 80.9351, 26.7962, -25.6021, -85.2078, -3.1772, -5.60653, 18.5833, -1.69922, 33.762, -62.0643, 10.5996, 120.379, 26.1496, -1.79548, -85.988, -7.98489, -8.53012, 0.685067, 0.426117, -10.145, -34.3784, -8.56426, 109.22, 8.65653, -21.4381, 54.2528, -10.5763, -15.299, 77.4904, -1.2215, -15.9854, 57.806, 6.44904, 121.894, 2.27672, -29.1383, 51.1203, -10.8597, -18.8063, 104.84, -1.50967, -0.104183, 52.0697, 10.6917, 109.465, 38.433, -14.9109, 12.0529, -12.5995, -21.9996, -17.7198, 3.39459, 113.144, -62.9154, -10, 105.913, Average Score: 3254.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.7932, -51.2015, -4.20233, -7.5467, -53.6744, 89.5618, 10.184, 54.9508, -51.3458, -9.13583, 84.327, 27.2593, -19.2715, -87.7544, -9.86667, -13.9998, 15.6793, 2.16803, 34.5385, -62.9009, 3.24208, 121.165, 24.3309, -4.47109, -79.7111, -12.092, -13.1623, 4.26195, 4.44397, -11.3781, -38.4407, -7.25598, 108.16, 15.1812, -25.0672, 58.2328, -5.98338, -18.7146, 70.6071, 4.27607, -13.6623, 53.9009, 1.44011, 122, 8.17777, -28.1661, 54.1842, -7.24008, -15.5273, 112.636, -0.777589, -0.684568, 43.9459, 5.20599, 101.539, 34.2596, -18.9589, 8.45115, -11.4616, -20.5857, -11.5343, -0.384049, 114.683, -67.2928, -4.45612, 101.874, trial: 0, score: 4285 trial: 1, score: 3997 trial: 2, score: 4125 trial: 3, score: 3773 trial: 4, score: 3933 Policy 1: 26.7932, -51.2015, -4.20233, -7.5467, -53.6744, 89.5618, 10.184, 54.9508, -51.3458, -9.13583, 84.327, 27.2593, -19.2715, -87.7544, -9.86667, -13.9998, 15.6793, 2.16803, 34.5385, -62.9009, 3.24208, 121.165, 24.3309, -4.47109, -79.7111, -12.092, -13.1623, 4.26195, 4.44397, -11.3781, -38.4407, -7.25598, 108.16, 15.1812, -25.0672, 58.2328, -5.98338, -18.7146, 70.6071, 4.27607, -13.6623, 53.9009, 1.44011, 122, 8.17777, -28.1661, 54.1842, -7.24008, -15.5273, 112.636, -0.777589, -0.684568, 43.9459, 5.20599, 101.539, 34.2596, -18.9589, 8.45115, -11.4616, -20.5857, -11.5343, -0.384049, 114.683, -67.2928, -4.45612, 101.874, Average Score: 4022.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.758, -48.1708, -2.58181, -0.793096, -55, 87.52, 10.038, 55.6161, -56.1083, -10, 82.5816, 25.5226, -19.9412, -85.7343, -5.56306, -12.9581, 17.2817, 1.60304, 37.9604, -64.2644, 2.50295, 119.066, 28.1001, -3.49748, -88, -8.57398, -5.16933, 4.83538, -0.767596, -13.3697, -40.6253, -1.56036, 105.108, 17.1611, -23.3443, 53.1644, -5.14747, -22.4637, 76.3105, -2.20583, -16.7505, 56.3631, 0.173976, 119.909, 7.29652, -33.3373, 53.3302, -3.83076, -23.3007, 111.464, -2.12861, -3.49843, 52.7252, 7.53119, 107.902, 39.866, -20.3402, 16.4257, -9.60134, -25.0947, -19.1238, 6.75051, 120.009, -65.1591, -7.02444, 106.206, trial: 0, score: 4381 trial: 1, score: 4573 trial: 2, score: 2781 trial: 3, score: 4573 trial: 4, score: 2813 Policy 1: 24.758, -48.1708, -2.58181, -0.793096, -55, 87.52, 10.038, 55.6161, -56.1083, -10, 82.5816, 25.5226, -19.9412, -85.7343, -5.56306, -12.9581, 17.2817, 1.60304, 37.9604, -64.2644, 2.50295, 119.066, 28.1001, -3.49748, -88, -8.57398, -5.16933, 4.83538, -0.767596, -13.3697, -40.6253, -1.56036, 105.108, 17.1611, -23.3443, 53.1644, -5.14747, -22.4637, 76.3105, -2.20583, -16.7505, 56.3631, 0.173976, 119.909, 7.29652, -33.3373, 53.3302, -3.83076, -23.3007, 111.464, -2.12861, -3.49843, 52.7252, 7.53119, 107.902, 39.866, -20.3402, 16.4257, -9.60134, -25.0947, -19.1238, 6.75051, 120.009, -65.1591, -7.02444, 106.206, Average Score: 3824.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.8459, -51.7848, -8.72018, -2.52123, -48.1202, 82.2383, 14.0874, 53.7505, -50.9963, -6.00893, 78.5827, 29.9534, -25.156, -79.1241, -11.5965, -10.5236, 17.5457, 6.58311, 38.8311, -56.5938, 2.99929, 119.755, 29.0809, -5.14312, -85.4501, -12.7072, -5.65151, 2.91743, 6.53033, -6.59724, -42.5134, -4.47894, 106.848, 12.1591, -24.088, 54.0549, -2.89286, -17.2979, 77.6374, 4.01266, -14.2591, 62.1636, 2.45993, 117.834, 3.5408, -28.2818, 52.51, -8.94255, -23.9459, 110.148, -7.14485, -2.08999, 52.4324, 11.4337, 104.209, 37.2231, -18.8283, 8.16556, -6.69942, -22.168, -18.2845, 6.04254, 118.822, -69.2984, -9.24976, 108.388, trial: 0, score: 4223 trial: 1, score: 3293 trial: 2, score: 3807 trial: 3, score: 4029 trial: 4, score: 4285 Policy 1: 27.8459, -51.7848, -8.72018, -2.52123, -48.1202, 82.2383, 14.0874, 53.7505, -50.9963, -6.00893, 78.5827, 29.9534, -25.156, -79.1241, -11.5965, -10.5236, 17.5457, 6.58311, 38.8311, -56.5938, 2.99929, 119.755, 29.0809, -5.14312, -85.4501, -12.7072, -5.65151, 2.91743, 6.53033, -6.59724, -42.5134, -4.47894, 106.848, 12.1591, -24.088, 54.0549, -2.89286, -17.2979, 77.6374, 4.01266, -14.2591, 62.1636, 2.45993, 117.834, 3.5408, -28.2818, 52.51, -8.94255, -23.9459, 110.148, -7.14485, -2.08999, 52.4324, 11.4337, 104.209, 37.2231, -18.8283, 8.16556, -6.69942, -22.168, -18.2845, 6.04254, 118.822, -69.2984, -9.24976, 108.388, Average Score: 3927.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.7193, -57.5197, -4.11591, -5.49988, -55, 90.9463, 12.5322, 49.3166, -48.2616, -10, 86.1249, 24.0442, -25.2292, -81.534, -3.25427, -9.64445, 19.8838, -0.77991, 34.6961, -58.976, 10.803, 113.624, 26.6446, -10.9966, -80.5445, -14.0148, -13.022, 3.14073, 2.30859, -13.8019, -43.0312, -2.71668, 109.973, 9.34458, -30.6359, 60.5894, -3.31321, -20.8892, 73.6453, 6.27208, -14.556, 61.6391, 0.70501, 118.148, 2.5987, -32.0326, 54.318, -7.76097, -23.6284, 104.35, -2.74325, 0.99944, 51.6948, 13.0282, 101.524, 33.5084, -18.2856, 9.53557, -4.95552, -23.6942, -18.6982, -1.7443, 121.971, -70.0067, -10, 107.354, trial: 0, score: 3293 trial: 1, score: 3101 trial: 2, score: 4479 trial: 3, score: 3574 trial: 4, score: 3005 Policy 1: 26.7193, -57.5197, -4.11591, -5.49988, -55, 90.9463, 12.5322, 49.3166, -48.2616, -10, 86.1249, 24.0442, -25.2292, -81.534, -3.25427, -9.64445, 19.8838, -0.77991, 34.6961, -58.976, 10.803, 113.624, 26.6446, -10.9966, -80.5445, -14.0148, -13.022, 3.14073, 2.30859, -13.8019, -43.0312, -2.71668, 109.973, 9.34458, -30.6359, 60.5894, -3.31321, -20.8892, 73.6453, 6.27208, -14.556, 61.6391, 0.70501, 118.148, 2.5987, -32.0326, 54.318, -7.76097, -23.6284, 104.35, -2.74325, 0.99944, 51.6948, 13.0282, 101.524, 33.5084, -18.2856, 9.53557, -4.95552, -23.6942, -18.6982, -1.7443, 121.971, -70.0067, -10, 107.354, Average Score: 3490.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.869, -54.8235, -6.76313, -6.186, -47.0782, 87.4206, 11.0178, 56.4304, -55.0337, -10, 82.7882, 24.3079, -22.613, -84.8432, -5.41392, -5.1892, 18.444, 3.29417, 40.3895, -61.9119, 7.69019, 115.265, 28.2757, -6.38379, -86.0206, -5.59375, -4.86429, -2.43028, 5.42552, -6.23701, -36.5023, -3.16772, 112.935, 15.6365, -30.0851, 59.9646, -6.22499, -17.679, 75.8022, 1.41662, -10.0509, 57.5723, 4.81869, 120.388, 8.41913, -33.6942, 51.6918, -9.60389, -16.8703, 111.882, -4.63182, -2.21696, 50.2299, 11.5694, 103.095, 36.1458, -21.0303, 11.4319, -5.59758, -24.2793, -15.6221, 5.92158, 116.16, -72.2866, -9.19508, 104.706, trial: 0, score: 3037 trial: 1, score: 3767 trial: 2, score: 3453 trial: 3, score: 1693 trial: 4, score: 3293 Policy 1: 25.869, -54.8235, -6.76313, -6.186, -47.0782, 87.4206, 11.0178, 56.4304, -55.0337, -10, 82.7882, 24.3079, -22.613, -84.8432, -5.41392, -5.1892, 18.444, 3.29417, 40.3895, -61.9119, 7.69019, 115.265, 28.2757, -6.38379, -86.0206, -5.59375, -4.86429, -2.43028, 5.42552, -6.23701, -36.5023, -3.16772, 112.935, 15.6365, -30.0851, 59.9646, -6.22499, -17.679, 75.8022, 1.41662, -10.0509, 57.5723, 4.81869, 120.388, 8.41913, -33.6942, 51.6918, -9.60389, -16.8703, 111.882, -4.63182, -2.21696, 50.2299, 11.5694, 103.095, 36.1458, -21.0303, 11.4319, -5.59758, -24.2793, -15.6221, 5.92158, 116.16, -72.2866, -9.19508, 104.706, Average Score: 3048.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.4507, -55.5794, -4.64009, -0.371283, -52.7365, 88.6947, 9.22387, 47.8369, -54.7116, -7.6868, 77.1629, 27.6789, -22.2994, -85.2074, -2.42626, -6.54034, 14.0554, -0.272562, 36.1369, -54.5639, 11.0821, 114.661, 28.0105, -5.22388, -81.3346, -7.01127, -7.17771, 1.85212, 7.60186, -14.1083, -34.6925, -10, 110.616, 17.6556, -23.306, 60.7336, -3.60387, -23.6173, 75.6868, 4.86215, -12.4846, 55.8597, 3.7172, 117.678, 4.44677, -28.5209, 52.7023, -2.13224, -21.8529, 108.436, -8.93367, 4.20277, 44.2601, 9.47121, 106.236, 35.8617, -20.244, 11.3637, -10.9132, -22.524, -18.093, -1.90488, 113.81, -64.9836, -8.93216, 107.516, trial: 0, score: 1631 trial: 1, score: 3613 trial: 2, score: 2687 trial: 3, score: 3645 trial: 4, score: 1910 Policy 1: 27.4507, -55.5794, -4.64009, -0.371283, -52.7365, 88.6947, 9.22387, 47.8369, -54.7116, -7.6868, 77.1629, 27.6789, -22.2994, -85.2074, -2.42626, -6.54034, 14.0554, -0.272562, 36.1369, -54.5639, 11.0821, 114.661, 28.0105, -5.22388, -81.3346, -7.01127, -7.17771, 1.85212, 7.60186, -14.1083, -34.6925, -10, 110.616, 17.6556, -23.306, 60.7336, -3.60387, -23.6173, 75.6868, 4.86215, -12.4846, 55.8597, 3.7172, 117.678, 4.44677, -28.5209, 52.7023, -2.13224, -21.8529, 108.436, -8.93367, 4.20277, 44.2601, 9.47121, 106.236, 35.8617, -20.244, 11.3637, -10.9132, -22.524, -18.093, -1.90488, 113.81, -64.9836, -8.93216, 107.516, Average Score: 2697.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.8597, -48.347, -4.94812, -4.60832, -50.6123, 90.2569, 10.2177, 55.9964, -51.3251, -10, 86.1027, 28.6318, -20.4998, -81.9787, -4.41019, -7.48668, 22.707, 0.715009, 36.0857, -55.3547, 9.38526, 113.734, 24.5515, -5.70455, -85.711, -13.5291, -9.86459, 0.534909, 0.473562, -6.90412, -41.8482, -7.44379, 108.562, 11.4349, -25.7387, 57.2443, -9.65314, -24.045, 75.2702, 5.15499, -9.71851, 55.2457, 3.01655, 122, 3.85879, -34.9167, 59.329, -4.70438, -18.6242, 112.04, -8.27961, 2.82643, 52.7341, 8.11628, 105.158, 32.4608, -14.0453, 8.1889, -10.8769, -25.8207, -14.6034, -0.00809751, 119.388, -69.1928, -10, 109.919, trial: 0, score: 1782 trial: 1, score: 2079 trial: 2, score: 2077 trial: 3, score: 2144 trial: 4, score: 2198 Policy 1: 28.8597, -48.347, -4.94812, -4.60832, -50.6123, 90.2569, 10.2177, 55.9964, -51.3251, -10, 86.1027, 28.6318, -20.4998, -81.9787, -4.41019, -7.48668, 22.707, 0.715009, 36.0857, -55.3547, 9.38526, 113.734, 24.5515, -5.70455, -85.711, -13.5291, -9.86459, 0.534909, 0.473562, -6.90412, -41.8482, -7.44379, 108.562, 11.4349, -25.7387, 57.2443, -9.65314, -24.045, 75.2702, 5.15499, -9.71851, 55.2457, 3.01655, 122, 3.85879, -34.9167, 59.329, -4.70438, -18.6242, 112.04, -8.27961, 2.82643, 52.7341, 8.11628, 105.158, 32.4608, -14.0453, 8.1889, -10.8769, -25.8207, -14.6034, -0.00809751, 119.388, -69.1928, -10, 109.919, Average Score: 2056 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.9371, -50.7967, -4.23842, -0.223527, -46.6331, 84.3075, 14.0785, 54.7592, -50.361, -9.8587, 78.886, 26.9138, -24.4176, -87.2994, -7.30951, -5.12059, 19.955, 4.8028, 33.1668, -56.4893, 2.20103, 117.282, 27.221, -10.5139, -80.2496, -8.926, -10.2045, -3.17847, 0.708513, -10.8729, -42.6625, -7.18525, 108.097, 8.76766, -30.3095, 56.417, -8.16847, -22.7049, 70.8361, -1.20235, -9.55276, 59.0472, 1.64011, 118.349, 9.62118, -26.9099, 54.5945, -11.9196, -23.8075, 108.314, -9.46396, 3.06086, 45.7808, 12.507, 107.212, 41.9798, -18.0757, 8.61757, -10.8246, -19.073, -14.0011, -0.905083, 118.273, -63.6063, -3.65269, 105.228, trial: 0, score: 2367 trial: 1, score: 2205 trial: 2, score: 1949 trial: 3, score: 2207 trial: 4, score: 2109 Policy 1: 22.9371, -50.7967, -4.23842, -0.223527, -46.6331, 84.3075, 14.0785, 54.7592, -50.361, -9.8587, 78.886, 26.9138, -24.4176, -87.2994, -7.30951, -5.12059, 19.955, 4.8028, 33.1668, -56.4893, 2.20103, 117.282, 27.221, -10.5139, -80.2496, -8.926, -10.2045, -3.17847, 0.708513, -10.8729, -42.6625, -7.18525, 108.097, 8.76766, -30.3095, 56.417, -8.16847, -22.7049, 70.8361, -1.20235, -9.55276, 59.0472, 1.64011, 118.349, 9.62118, -26.9099, 54.5945, -11.9196, -23.8075, 108.314, -9.46396, 3.06086, 45.7808, 12.507, 107.212, 41.9798, -18.0757, 8.61757, -10.8246, -19.073, -14.0011, -0.905083, 118.273, -63.6063, -3.65269, 105.228, Average Score: 2167.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.7856, -56.2994, -2.78892, -1.34685, -51.9268, 85.7015, 16.5245, 51.8834, -48.2955, -5.68196, 81.9152, 30.5233, -22.8999, -84.3777, -10.1769, -9.20717, 22.6129, 3.44856, 38.3679, -63.3016, 3.80428, 118.226, 24.2476, -8.80081, -86.9789, -5.41743, -7.98055, 1.51387, 1.66475, -12.082, -40.7861, -10, 111.395, 13.9939, -22.746, 51.3424, -4.64609, -20.9747, 72.0164, -0.768603, -11.7423, 54.8255, 5.27548, 122, 2.75754, -25.793, 58.861, -3.27671, -19.3982, 107.642, -2.83792, 0.815969, 49.0078, 9.96735, 107.732, 33.2948, -17.9891, 13.0939, -14.3936, -21.0933, -15.6386, 3.93278, 114.847, -63.537, -8.44729, 101.376, trial: 0, score: 3773 trial: 1, score: 3197 trial: 2, score: 4030 trial: 3, score: 3254 trial: 4, score: 4927 Policy 1: 29.7856, -56.2994, -2.78892, -1.34685, -51.9268, 85.7015, 16.5245, 51.8834, -48.2955, -5.68196, 81.9152, 30.5233, -22.8999, -84.3777, -10.1769, -9.20717, 22.6129, 3.44856, 38.3679, -63.3016, 3.80428, 118.226, 24.2476, -8.80081, -86.9789, -5.41743, -7.98055, 1.51387, 1.66475, -12.082, -40.7861, -10, 111.395, 13.9939, -22.746, 51.3424, -4.64609, -20.9747, 72.0164, -0.768603, -11.7423, 54.8255, 5.27548, 122, 2.75754, -25.793, 58.861, -3.27671, -19.3982, 107.642, -2.83792, 0.815969, 49.0078, 9.96735, 107.732, 33.2948, -17.9891, 13.0939, -14.3936, -21.0933, -15.6386, 3.93278, 114.847, -63.537, -8.44729, 101.376, Average Score: 3836.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.2, -55.6813, -4.95238, -6.46783, -46.9071, 90.8264, 16.4687, 55.8911, -54.3537, -8.13675, 81.6508, 23.562, -21.1247, -81.692, -8.69402, -9.75932, 19.9237, -0.311419, 36.8909, -62.149, 8.30314, 115.467, 24.5934, -7.46593, -80.1457, -10.0938, -11.9817, 2.27084, 1.25205, -7.22045, -41.2081, -10, 109.398, 9.04654, -29.6328, 52.2483, -10.2828, -23.7642, 79.6881, 1.68706, -12.3805, 55.5677, 3.23, 122, 2.71178, -31.0507, 55.2303, -10.5778, -21.3027, 107.822, -1.74504, -1.82039, 44.8702, 11.5462, 101.983, 41.2162, -22.4127, 12.6391, -6.82614, -24.3991, -13.1198, 2.48028, 116.492, -70.7368, -8.72097, 107.806, trial: 0, score: 3421 trial: 1, score: 3229 trial: 2, score: 3575 trial: 3, score: 2781 trial: 4, score: 1565 Policy 1: 31.2, -55.6813, -4.95238, -6.46783, -46.9071, 90.8264, 16.4687, 55.8911, -54.3537, -8.13675, 81.6508, 23.562, -21.1247, -81.692, -8.69402, -9.75932, 19.9237, -0.311419, 36.8909, -62.149, 8.30314, 115.467, 24.5934, -7.46593, -80.1457, -10.0938, -11.9817, 2.27084, 1.25205, -7.22045, -41.2081, -10, 109.398, 9.04654, -29.6328, 52.2483, -10.2828, -23.7642, 79.6881, 1.68706, -12.3805, 55.5677, 3.23, 122, 2.71178, -31.0507, 55.2303, -10.5778, -21.3027, 107.822, -1.74504, -1.82039, 44.8702, 11.5462, 101.983, 41.2162, -22.4127, 12.6391, -6.82614, -24.3991, -13.1198, 2.48028, 116.492, -70.7368, -8.72097, 107.806, Average Score: 2914.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.3611, -56.9218, -3.1844, -1.09965, -48.1112, 84.4545, 18.7872, 52.7398, -50.7617, -6.89562, 77.0787, 23.9609, -25.7295, -81.7264, -3.56545, -9.25165, 17.025, -2.51186, 31.5143, -59.0378, 10.5398, 116.284, 23.6278, -10.8877, -79.5936, -6.35013, -9.85046, 5.72538, 3.66749, -9.92814, -40.8759, -4.69286, 105.801, 9.11017, -29.4975, 57.3187, -8.16429, -21.5714, 75.9922, 1.6405, -17.2418, 62.9201, -0.740854, 122, 8.55602, -25.9762, 55.5252, -9.55727, -22.0692, 109.107, -3.86712, -0.987952, 47.6101, 7.42318, 110.253, 34.6927, -18.6556, 13.8233, -9.00104, -23.5293, -20.1396, 4.48111, 114.922, -67.6844, -10, 109.391, trial: 0, score: 1885 trial: 1, score: 3221 trial: 2, score: 3583 trial: 3, score: 2366 trial: 4, score: 1854 Policy 1: 26.3611, -56.9218, -3.1844, -1.09965, -48.1112, 84.4545, 18.7872, 52.7398, -50.7617, -6.89562, 77.0787, 23.9609, -25.7295, -81.7264, -3.56545, -9.25165, 17.025, -2.51186, 31.5143, -59.0378, 10.5398, 116.284, 23.6278, -10.8877, -79.5936, -6.35013, -9.85046, 5.72538, 3.66749, -9.92814, -40.8759, -4.69286, 105.801, 9.11017, -29.4975, 57.3187, -8.16429, -21.5714, 75.9922, 1.6405, -17.2418, 62.9201, -0.740854, 122, 8.55602, -25.9762, 55.5252, -9.55727, -22.0692, 109.107, -3.86712, -0.987952, 47.6101, 7.42318, 110.253, 34.6927, -18.6556, 13.8233, -9.00104, -23.5293, -20.1396, 4.48111, 114.922, -67.6844, -10, 109.391, Average Score: 2581.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.9668, -52.0139, -1.36278, -9.12805, -50.2748, 84.2961, 11.7209, 53.9785, -50.5837, -5.33823, 81.6693, 33.0763, -22.9182, -85.7047, -2.58471, -12.2736, 13.8192, -2.55656, 38.3733, -64.3738, 8.13868, 121.87, 22.3126, -2.382, -88, -5.87211, -12.4494, -1.59069, -0.378478, -14.8797, -42.4577, -3.5205, 106.402, 10.303, -26.7017, 60.7698, -8.2073, -20.1315, 70.3755, 2.50373, -8.8224, 63.0483, 0.997034, 122, 3.20329, -28.3559, 56.0646, -7.46595, -22.9609, 112.07, -2.00162, 1.27926, 48.3955, 7.14064, 110.447, 34.8117, -18.2846, 11.1287, -14.733, -24.5182, -11.6083, 4.4376, 121.93, -64.6774, -10, 108.505, trial: 0, score: 4567 trial: 1, score: 3645 trial: 2, score: 2718 trial: 3, score: 2230 trial: 4, score: 2431 Policy 1: 25.9668, -52.0139, -1.36278, -9.12805, -50.2748, 84.2961, 11.7209, 53.9785, -50.5837, -5.33823, 81.6693, 33.0763, -22.9182, -85.7047, -2.58471, -12.2736, 13.8192, -2.55656, 38.3733, -64.3738, 8.13868, 121.87, 22.3126, -2.382, -88, -5.87211, -12.4494, -1.59069, -0.378478, -14.8797, -42.4577, -3.5205, 106.402, 10.303, -26.7017, 60.7698, -8.2073, -20.1315, 70.3755, 2.50373, -8.8224, 63.0483, 0.997034, 122, 3.20329, -28.3559, 56.0646, -7.46595, -22.9609, 112.07, -2.00162, 1.27926, 48.3955, 7.14064, 110.447, 34.8117, -18.2846, 11.1287, -14.733, -24.5182, -11.6083, 4.4376, 121.93, -64.6774, -10, 108.505, Average Score: 3118.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.4225, -53.9785, -4.15182, -1.82742, -49.81, 85.0864, 14.3575, 56.6563, -56.1484, -10, 79.5158, 31.0912, -26.1162, -79.5893, -3.86255, -4.96484, 19.045, 2.37838, 39.3436, -60.5153, 8.83096, 117.905, 22.7348, -1.79412, -87.9843, -4.79165, -4.85338, 6.6713, -2.22541, -14.2497, -39.3558, -2.51747, 110.141, 8.08595, -25.6311, 53.0927, -9.33286, -22.852, 78.2498, 6.03047, -8.45375, 62.6533, 5.79208, 121.53, 0, -31.4675, 56.2701, -3.88328, -23.4873, 107.834, -2.46024, -2.67035, 49.6263, 8.43194, 103.712, 39.9104, -17.7612, 14.4017, -5.49714, -26.8384, -15.9335, 2.79347, 122, -69.3725, -6.52128, 101.166, trial: 0, score: 2325 trial: 1, score: 2879 trial: 2, score: 2271 trial: 3, score: 2621 trial: 4, score: 4797 Policy 1: 31.4225, -53.9785, -4.15182, -1.82742, -49.81, 85.0864, 14.3575, 56.6563, -56.1484, -10, 79.5158, 31.0912, -26.1162, -79.5893, -3.86255, -4.96484, 19.045, 2.37838, 39.3436, -60.5153, 8.83096, 117.905, 22.7348, -1.79412, -87.9843, -4.79165, -4.85338, 6.6713, -2.22541, -14.2497, -39.3558, -2.51747, 110.141, 8.08595, -25.6311, 53.0927, -9.33286, -22.852, 78.2498, 6.03047, -8.45375, 62.6533, 5.79208, 121.53, 0, -31.4675, 56.2701, -3.88328, -23.4873, 107.834, -2.46024, -2.67035, 49.6263, 8.43194, 103.712, 39.9104, -17.7612, 14.4017, -5.49714, -26.8384, -15.9335, 2.79347, 122, -69.3725, -6.52128, 101.166, Average Score: 2978.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.7119, -50.8041, -0.51351, -4.11862, -53.1014, 87.442, 13.0332, 56.8134, -50.4577, -10, 78.672, 23.6973, -24.2595, -83.7487, -2.63064, -8.4172, 20.5234, 4.49147, 34.9943, -62.5481, 2.33859, 119.594, 22.807, -6.063, -81.7674, -12.0181, -11.9321, -0.394796, 5.20899, -13.0244, -41.4405, -10, 111.945, 14.5658, -23.7521, 54.5389, -2.58061, -18.868, 70.6217, 3.69727, -8.4857, 58.4343, -1.23174, 122, 3.88802, -30.8668, 52.2085, -5.65121, -22.4933, 107.332, -8.46701, -1.19788, 51.5384, 4.30101, 104.111, 34.0261, -20.3438, 9.98515, -13.1339, -19.5436, -19.6785, 0.618273, 114.812, -66.4859, -10, 101.148, trial: 0, score: 3965 trial: 1, score: 3453 trial: 2, score: 3005 trial: 3, score: 3742 trial: 4, score: 4541 Policy 1: 29.7119, -50.8041, -0.51351, -4.11862, -53.1014, 87.442, 13.0332, 56.8134, -50.4577, -10, 78.672, 23.6973, -24.2595, -83.7487, -2.63064, -8.4172, 20.5234, 4.49147, 34.9943, -62.5481, 2.33859, 119.594, 22.807, -6.063, -81.7674, -12.0181, -11.9321, -0.394796, 5.20899, -13.0244, -41.4405, -10, 111.945, 14.5658, -23.7521, 54.5389, -2.58061, -18.868, 70.6217, 3.69727, -8.4857, 58.4343, -1.23174, 122, 3.88802, -30.8668, 52.2085, -5.65121, -22.4933, 107.332, -8.46701, -1.19788, 51.5384, 4.30101, 104.111, 34.0261, -20.3438, 9.98515, -13.1339, -19.5436, -19.6785, 0.618273, 114.812, -66.4859, -10, 101.148, Average Score: 3741.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.018, -50.4184, -4.6931, -0.03169, -47.9462, 85.586, 8.8765, 50.4633, -55.0856, -10, 79.2831, 29.1419, -22.659, -87.3994, -10.7835, -7.46188, 21.9189, -0.436357, 32.312, -62.1969, 2.07948, 120.993, 21.9863, -1.81882, -84.5497, -6.35901, -3.86068, 0.426158, 2.65277, -9.76402, -42.3406, -4.32918, 108.417, 12.8522, -29.7823, 56.2723, -2.18105, -22.0345, 74.5326, 3.41747, -13.659, 57.3512, 1.71901, 121.35, 3.27075, -28.995, 54.5166, -11.6478, -20.429, 110.011, -7.78847, 2.53708, 44.5181, 6.61743, 103.703, 33.5683, -17.8945, 11.7409, -9.98316, -18.8026, -14.8929, 4.72099, 121.037, -65.6048, -8.68449, 110.09, trial: 0, score: 4063 trial: 1, score: 4285 trial: 2, score: 1750 trial: 3, score: 3456 trial: 4, score: 4534 Policy 1: 31.018, -50.4184, -4.6931, -0.03169, -47.9462, 85.586, 8.8765, 50.4633, -55.0856, -10, 79.2831, 29.1419, -22.659, -87.3994, -10.7835, -7.46188, 21.9189, -0.436357, 32.312, -62.1969, 2.07948, 120.993, 21.9863, -1.81882, -84.5497, -6.35901, -3.86068, 0.426158, 2.65277, -9.76402, -42.3406, -4.32918, 108.417, 12.8522, -29.7823, 56.2723, -2.18105, -22.0345, 74.5326, 3.41747, -13.659, 57.3512, 1.71901, 121.35, 3.27075, -28.995, 54.5166, -11.6478, -20.429, 110.011, -7.78847, 2.53708, 44.5181, 6.61743, 103.703, 33.5683, -17.8945, 11.7409, -9.98316, -18.8026, -14.8929, 4.72099, 121.037, -65.6048, -8.68449, 110.09, Average Score: 3617.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.5004, -55.153, -4.66357, -2.83793, -49.002, 89.5951, 13.6581, 55.055, -51.5862, -6.19037, 86.1277, 24.6712, -23.9518, -83.8824, -3.96595, -13.7013, 13.7054, 0.293642, 38.9993, -58.5823, 1.82144, 116.622, 29.7866, -10.6108, -84.803, -8.49884, -11.8281, 3.6606, 5.17349, -6.11437, -33.5486, -10, 110.727, 15.9757, -26.4201, 54.3197, -10.4121, -21.3053, 78.1248, 2.38924, -13.4236, 60.1956, -0.952565, 117.318, 9.50466, -33.2281, 57.8946, -3.37543, -23.3017, 114.093, -2.7778, 0.666554, 44.7321, 11.3685, 102.767, 34.7021, -14.0745, 16.435, -10.6826, -23.0673, -20.5804, -0.361972, 113.768, -67.8801, -3.48565, 104.061, trial: 0, score: 4286 trial: 1, score: 3607 trial: 2, score: 2941 trial: 3, score: 3327 trial: 4, score: 3645 Policy 1: 30.5004, -55.153, -4.66357, -2.83793, -49.002, 89.5951, 13.6581, 55.055, -51.5862, -6.19037, 86.1277, 24.6712, -23.9518, -83.8824, -3.96595, -13.7013, 13.7054, 0.293642, 38.9993, -58.5823, 1.82144, 116.622, 29.7866, -10.6108, -84.803, -8.49884, -11.8281, 3.6606, 5.17349, -6.11437, -33.5486, -10, 110.727, 15.9757, -26.4201, 54.3197, -10.4121, -21.3053, 78.1248, 2.38924, -13.4236, 60.1956, -0.952565, 117.318, 9.50466, -33.2281, 57.8946, -3.37543, -23.3017, 114.093, -2.7778, 0.666554, 44.7321, 11.3685, 102.767, 34.7021, -14.0745, 16.435, -10.6826, -23.0673, -20.5804, -0.361972, 113.768, -67.8801, -3.48565, 104.061, Average Score: 3561.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.4051, -50.9827, -8.56097, -9.05149, -51.1587, 81.7569, 13.161, 51.2949, -55.9314, -8.45321, 83.0562, 25.9183, -23.4055, -87.6675, -8.03711, -6.04804, 18.63, 4.13851, 35.8662, -60.6554, 10.9032, 117.685, 24.8737, -4.30247, -85.4713, -6.883, -3.60445, -2.12652, -1.7224, -7.6874, -34.3267, -8.34117, 109.273, 17.2043, -28.3385, 56.6592, -5.44492, -21.6315, 71.0356, 5.22939, -15.0641, 60.945, 6.04139, 117.78, 1.67233, -30.1459, 54.3652, -2.48087, -20.821, 105.343, -5.16901, 2.91602, 44.4769, 11.0438, 103.705, 37.5645, -20.884, 17.2727, -9.36142, -19.2729, -18.2058, -0.213101, 122, -64.1423, -7.81287, 104.784, trial: 0, score: 1886 trial: 1, score: 4246 trial: 2, score: 1981 trial: 3, score: 3965 trial: 4, score: 3359 Policy 1: 22.4051, -50.9827, -8.56097, -9.05149, -51.1587, 81.7569, 13.161, 51.2949, -55.9314, -8.45321, 83.0562, 25.9183, -23.4055, -87.6675, -8.03711, -6.04804, 18.63, 4.13851, 35.8662, -60.6554, 10.9032, 117.685, 24.8737, -4.30247, -85.4713, -6.883, -3.60445, -2.12652, -1.7224, -7.6874, -34.3267, -8.34117, 109.273, 17.2043, -28.3385, 56.6592, -5.44492, -21.6315, 71.0356, 5.22939, -15.0641, 60.945, 6.04139, 117.78, 1.67233, -30.1459, 54.3652, -2.48087, -20.821, 105.343, -5.16901, 2.91602, 44.4769, 11.0438, 103.705, 37.5645, -20.884, 17.2727, -9.36142, -19.2729, -18.2058, -0.213101, 122, -64.1423, -7.81287, 104.784, Average Score: 3087.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.6196, -57.0229, -8.65279, -0.203108, -51.9823, 86.2306, 14.1591, 55.0882, -55.8691, -6.90673, 78.2962, 32.7271, -23.6307, -78.4463, -3.35735, -7.63766, 16.4657, 0.387567, 40.0088, -60.9169, 6.54055, 121.52, 23.3413, -10.2914, -79.7863, -10.2892, -10.9682, 3.84499, 2.50261, -11.4466, -40.8039, -4.12062, 104.321, 9.30778, -30.0665, 53.3069, -9.06917, -20.998, 78.1286, 2.91277, -16.6356, 59.2851, 5.00951, 122, 6.96478, -27.0837, 58.7098, -5.66891, -20.5127, 109.889, -2.51256, 6.31827, 52.726, 11.2477, 106.515, 41.6128, -22.0085, 7.47388, -5.48316, -23.0595, -14.6328, 5.21797, 117.891, -64.4874, -6.99187, 101.839, trial: 0, score: 3190 trial: 1, score: 3357 trial: 2, score: 2365 trial: 3, score: 3133 trial: 4, score: 4383 Policy 1: 27.6196, -57.0229, -8.65279, -0.203108, -51.9823, 86.2306, 14.1591, 55.0882, -55.8691, -6.90673, 78.2962, 32.7271, -23.6307, -78.4463, -3.35735, -7.63766, 16.4657, 0.387567, 40.0088, -60.9169, 6.54055, 121.52, 23.3413, -10.2914, -79.7863, -10.2892, -10.9682, 3.84499, 2.50261, -11.4466, -40.8039, -4.12062, 104.321, 9.30778, -30.0665, 53.3069, -9.06917, -20.998, 78.1286, 2.91277, -16.6356, 59.2851, 5.00951, 122, 6.96478, -27.0837, 58.7098, -5.66891, -20.5127, 109.889, -2.51256, 6.31827, 52.726, 11.2477, 106.515, 41.6128, -22.0085, 7.47388, -5.48316, -23.0595, -14.6328, 5.21797, 117.891, -64.4874, -6.99187, 101.839, Average Score: 3285.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.837, -55.9775, -1.40074, -6.95498, -53.4671, 82.9253, 9.62103, 55.7273, -49.2514, -10, 81.6639, 31.6767, -27.3342, -82.7156, -6.41099, -4.45982, 15.6113, 1.16674, 31.4916, -59.0269, 9.09222, 115.85, 20.2233, -11.1714, -83.5102, -4.96629, -3.34908, 5.14087, 2.64443, -7.84137, -36.3002, -3.21294, 107.915, 8.3774, -23.4704, 58.3267, -6.97842, -22.0454, 75.5189, 1.06148, -14.5711, 53.7091, 3.05313, 122, 2.54264, -29.9019, 56.0499, -11.5293, -22.8567, 111.454, -4.86538, 5.73989, 44.3407, 12.6088, 110.645, 37.1521, -18.0117, 11.1406, -7.27772, -26.1551, -19.1253, -0.939139, 115.579, -70.6987, -10, 107.063, trial: 0, score: 2614 trial: 1, score: 2206 trial: 2, score: 2109 trial: 3, score: 2558 trial: 4, score: 2205 Policy 1: 27.837, -55.9775, -1.40074, -6.95498, -53.4671, 82.9253, 9.62103, 55.7273, -49.2514, -10, 81.6639, 31.6767, -27.3342, -82.7156, -6.41099, -4.45982, 15.6113, 1.16674, 31.4916, -59.0269, 9.09222, 115.85, 20.2233, -11.1714, -83.5102, -4.96629, -3.34908, 5.14087, 2.64443, -7.84137, -36.3002, -3.21294, 107.915, 8.3774, -23.4704, 58.3267, -6.97842, -22.0454, 75.5189, 1.06148, -14.5711, 53.7091, 3.05313, 122, 2.54264, -29.9019, 56.0499, -11.5293, -22.8567, 111.454, -4.86538, 5.73989, 44.3407, 12.6088, 110.645, 37.1521, -18.0117, 11.1406, -7.27772, -26.1551, -19.1253, -0.939139, 115.579, -70.6987, -10, 107.063, Average Score: 2338.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.9691, -53.6446, -8.50139, -6.89157, -53.9706, 87.5416, 17.7488, 56.7778, -52.3242, -9.47149, 83.2665, 27.9397, -20.8402, -83.6931, -10.736, -9.87715, 13.8767, -1.90257, 38.8407, -55.6018, 9.23678, 113.495, 29.4925, -3.4241, -80.565, -12.3618, -4.93272, 0.0463958, 7.39547, -8.06694, -35.7376, -10, 106.22, 13.927, -29.3863, 55.8025, -4.70346, -23.806, 79.7221, -0.695269, -15.4029, 60.8044, -1.11324, 120.406, 1.29811, -32.7094, 52.4395, -5.5632, -21.7486, 105.641, -5.9963, 0.981026, 46.9173, 8.60965, 111.151, 33.6694, -16.7657, 8.73988, -14.5481, -27.0526, -16.3134, 4.29814, 120.604, -62.9725, -10, 105.557, trial: 0, score: 4758 trial: 1, score: 5501 trial: 2, score: 4221 trial: 3, score: 5054 trial: 4, score: 4919 Policy 1: 30.9691, -53.6446, -8.50139, -6.89157, -53.9706, 87.5416, 17.7488, 56.7778, -52.3242, -9.47149, 83.2665, 27.9397, -20.8402, -83.6931, -10.736, -9.87715, 13.8767, -1.90257, 38.8407, -55.6018, 9.23678, 113.495, 29.4925, -3.4241, -80.565, -12.3618, -4.93272, 0.0463958, 7.39547, -8.06694, -35.7376, -10, 106.22, 13.927, -29.3863, 55.8025, -4.70346, -23.806, 79.7221, -0.695269, -15.4029, 60.8044, -1.11324, 120.406, 1.29811, -32.7094, 52.4395, -5.5632, -21.7486, 105.641, -5.9963, 0.981026, 46.9173, 8.60965, 111.151, 33.6694, -16.7657, 8.73988, -14.5481, -27.0526, -16.3134, 4.29814, 120.604, -62.9725, -10, 105.557, Average Score: 4890.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.8795, -51.765, -9.1958, -3.16864, -54.4238, 81.7958, 17.956, 48.4316, -56.1368, -10, 85.6472, 28.0032, -18.9527, -82.4201, -3.24521, -6.89605, 18.974, 4.95172, 32.442, -60.5304, 9.38409, 116.906, 20.6847, -8.1314, -80.7438, -5.29296, -4.26772, 1.56832, 5.69594, -10.4067, -41.4226, -4.18982, 104.959, 11.1106, -27.1574, 55.675, -4.33684, -24.8608, 75.3878, -2.70103, -16.571, 55.71, 3.89396, 121.997, 2.92693, -29.9936, 50.5513, -8.36826, -14.1677, 110.718, -8.30408, 3.11573, 50.0085, 5.49326, 103.869, 34.8956, -12.9331, 9.72119, -13.7289, -22.4681, -15.3812, -0.0695268, 117.403, -71.6895, -6.14666, 110.492, trial: 0, score: 2654 trial: 1, score: 2366 trial: 2, score: 2527 trial: 3, score: 3453 trial: 4, score: 2557 Policy 1: 24.8795, -51.765, -9.1958, -3.16864, -54.4238, 81.7958, 17.956, 48.4316, -56.1368, -10, 85.6472, 28.0032, -18.9527, -82.4201, -3.24521, -6.89605, 18.974, 4.95172, 32.442, -60.5304, 9.38409, 116.906, 20.6847, -8.1314, -80.7438, -5.29296, -4.26772, 1.56832, 5.69594, -10.4067, -41.4226, -4.18982, 104.959, 11.1106, -27.1574, 55.675, -4.33684, -24.8608, 75.3878, -2.70103, -16.571, 55.71, 3.89396, 121.997, 2.92693, -29.9936, 50.5513, -8.36826, -14.1677, 110.718, -8.30408, 3.11573, 50.0085, 5.49326, 103.869, 34.8956, -12.9331, 9.72119, -13.7289, -22.4681, -15.3812, -0.0695268, 117.403, -71.6895, -6.14666, 110.492, Average Score: 2711.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 30.9163, -51.7882, -1.49125, -9.02415, -54.3404, 85.0174, 17.3556, 52.5239, -50.6808, -10, 82.6161, 29.2462, -26.3117, -85.4899, -2.98811, -8.15275, 13.8256, 1.64687, 35.501, -57.569, 2.44707, 120.489, 24.6175, -8.54087, -80.5813, -10.2859, -9.49953, 4.89003, 1.06506, -15.1647, -42.0434, -4.78248, 113.566, 17.1303, -29.3636, 54.2657, -5.73996, -17.9036, 74.1497, 0.110192, -9.41275, 57.9527, 5.06146, 121.084, 6.95136, -29.7934, 50.3556, -9.40599, -19.1294, 104.299, -0.293159, 3.47042, 52.6834, 6.05734, 109.097, 41.0229, -12.9933, 7.56184, -9.40542, -24.9339, -11.3394, 7.68657, 116.63, -67.858, -10, 108.629, trial: 0, score: 4222 trial: 1, score: 4374 trial: 2, score: 4733 trial: 3, score: 4095 trial: 4, score: 4317 Policy 1: 30.9163, -51.7882, -1.49125, -9.02415, -54.3404, 85.0174, 17.3556, 52.5239, -50.6808, -10, 82.6161, 29.2462, -26.3117, -85.4899, -2.98811, -8.15275, 13.8256, 1.64687, 35.501, -57.569, 2.44707, 120.489, 24.6175, -8.54087, -80.5813, -10.2859, -9.49953, 4.89003, 1.06506, -15.1647, -42.0434, -4.78248, 113.566, 17.1303, -29.3636, 54.2657, -5.73996, -17.9036, 74.1497, 0.110192, -9.41275, 57.9527, 5.06146, 121.084, 6.95136, -29.7934, 50.3556, -9.40599, -19.1294, 104.299, -0.293159, 3.47042, 52.6834, 6.05734, 109.097, 41.0229, -12.9933, 7.56184, -9.40542, -24.9339, -11.3394, 7.68657, 116.63, -67.858, -10, 108.629, Average Score: 4348.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.7327, -54.0447, -2.36218, -0.707932, -46.7215, 85.2631, 14.8367, 51.3962, -55.7868, -10, 80.1722, 30.7177, -17.362, -81.9529, -10.6731, -4.47697, 22.6698, -0.459275, 38.1325, -63.619, 6.21007, 112.645, 27.3464, -2.56725, -87.5865, -12.7047, -7.81943, -2.84665, -2.01368, -8.29019, -35.7661, -6.26128, 106.183, 13.738, -26.7458, 51.2341, -10.5799, -18.9486, 75.7117, 0.486959, -12.3557, 56.3353, 1.2454, 118.285, 5.36061, -30.5549, 55.1855, -8.95795, -15.0891, 105.425, -4.49151, -0.916623, 44.1865, 5.56053, 105.635, 41.703, -13.101, 9.74612, -9.29039, -24.8358, -12.0793, 0.67891, 122, -70.5927, -7.67895, 101.196, trial: 0, score: 3734 trial: 1, score: 2205 trial: 2, score: 2046 trial: 3, score: 3101 trial: 4, score: 1949 Policy 1: 31.7327, -54.0447, -2.36218, -0.707932, -46.7215, 85.2631, 14.8367, 51.3962, -55.7868, -10, 80.1722, 30.7177, -17.362, -81.9529, -10.6731, -4.47697, 22.6698, -0.459275, 38.1325, -63.619, 6.21007, 112.645, 27.3464, -2.56725, -87.5865, -12.7047, -7.81943, -2.84665, -2.01368, -8.29019, -35.7661, -6.26128, 106.183, 13.738, -26.7458, 51.2341, -10.5799, -18.9486, 75.7117, 0.486959, -12.3557, 56.3353, 1.2454, 118.285, 5.36061, -30.5549, 55.1855, -8.95795, -15.0891, 105.425, -4.49151, -0.916623, 44.1865, 5.56053, 105.635, 41.703, -13.101, 9.74612, -9.29039, -24.8358, -12.0793, 0.67891, 122, -70.5927, -7.67895, 101.196, Average Score: 2607 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.4637, -49.7605, -5.12607, -0.927092, -46.825, 88.0704, 10.2512, 54.1927, -46.585, -10, 81.1258, 31.0248, -23.0019, -84.3855, -4.18051, -12.0468, 16.5158, 0.372198, 41.2201, -63.6005, 4.64996, 117.701, 25.6725, -10.9056, -87.2338, -4.94396, -4.04826, 5.93353, -2.10803, -6.53473, -37.495, -4.78423, 106.111, 16.0903, -22.5143, 59.0877, -9.22784, -18.1206, 78.8184, 6.39605, -16.599, 54.0913, 6.34465, 122, 3.31899, -27.2236, 58.1089, -8.21684, -20.4809, 108.511, -3.28782, -2.3143, 45.7778, 7.93184, 102.111, 36.2966, -19.3156, 13.1348, -6.3916, -20.5184, -14.8109, 2.36777, 122, -63.9974, -10, 103.027, trial: 0, score: 4214 trial: 1, score: 2935 trial: 2, score: 3773 trial: 3, score: 3933 trial: 4, score: 4669 Policy 1: 22.4637, -49.7605, -5.12607, -0.927092, -46.825, 88.0704, 10.2512, 54.1927, -46.585, -10, 81.1258, 31.0248, -23.0019, -84.3855, -4.18051, -12.0468, 16.5158, 0.372198, 41.2201, -63.6005, 4.64996, 117.701, 25.6725, -10.9056, -87.2338, -4.94396, -4.04826, 5.93353, -2.10803, -6.53473, -37.495, -4.78423, 106.111, 16.0903, -22.5143, 59.0877, -9.22784, -18.1206, 78.8184, 6.39605, -16.599, 54.0913, 6.34465, 122, 3.31899, -27.2236, 58.1089, -8.21684, -20.4809, 108.511, -3.28782, -2.3143, 45.7778, 7.93184, 102.111, 36.2966, -19.3156, 13.1348, -6.3916, -20.5184, -14.8109, 2.36777, 122, -63.9974, -10, 103.027, Average Score: 3904.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.2982, -52.6441, 0.211415, -8.29385, -50.6893, 90.6969, 10.8738, 48.0983, -53.2935, -8.87368, 76.9272, 28.9394, -20.2526, -82.9939, -9.89387, -5.38569, 15.5281, 5.50461, 35.6082, -56.4704, 10.2583, 118.445, 22.9524, -4.59897, -79.9476, -8.8961, -10.474, 4.14887, 5.04336, -15.2199, -33.805, -2.77571, 107.774, 14.8649, -23.5325, 55.071, -5.45927, -21.398, 75.6741, -0.574143, -10.9524, 62.7511, 0.87029, 122, 8.87129, -32.8667, 56.5208, -8.26841, -21.0455, 106.055, -3.69182, 0.508848, 46.8054, 7.46815, 109.44, 32.3901, -15.4837, 15.4184, -6.62183, -21.922, -19.0032, -0.334895, 116.363, -68.5913, -3.71065, 110.832, trial: 0, score: 4317 trial: 1, score: 4029 trial: 2, score: 4541 trial: 3, score: 3997 trial: 4, score: 4830 Policy 1: 23.2982, -52.6441, 0.211415, -8.29385, -50.6893, 90.6969, 10.8738, 48.0983, -53.2935, -8.87368, 76.9272, 28.9394, -20.2526, -82.9939, -9.89387, -5.38569, 15.5281, 5.50461, 35.6082, -56.4704, 10.2583, 118.445, 22.9524, -4.59897, -79.9476, -8.8961, -10.474, 4.14887, 5.04336, -15.2199, -33.805, -2.77571, 107.774, 14.8649, -23.5325, 55.071, -5.45927, -21.398, 75.6741, -0.574143, -10.9524, 62.7511, 0.87029, 122, 8.87129, -32.8667, 56.5208, -8.26841, -21.0455, 106.055, -3.69182, 0.508848, 46.8054, 7.46815, 109.44, 32.3901, -15.4837, 15.4184, -6.62183, -21.922, -19.0032, -0.334895, 116.363, -68.5913, -3.71065, 110.832, Average Score: 4342.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.4377, -48.546, -3.44979, -6.55641, -49.0568, 90.1496, 18.06, 51.5143, -54.4209, -7.0489, 83.4666, 33.09, -25.0702, -84.4554, -3.2289, -11.5337, 14.9834, 1.38858, 36.554, -62.0007, 3.67954, 112.419, 20.7591, -6.64549, -88, -12.5281, -10.0105, 0.844308, 2.22427, -13.9653, -36.3232, -10, 111.037, 14.4821, -23.2655, 56.7103, -7.05982, -17.2793, 76.6622, 5.70575, -13.6256, 53.8705, 3.25447, 119.303, 3.96682, -30.502, 55.5904, -10.5006, -15.5391, 112.979, -3.77574, -3.07204, 47.4875, 3.22585, 104.991, 33.5301, -18.3366, 15.7738, -5.3008, -24.1124, -11.5537, 0.28358, 119.549, -68.226, -10, 104.192, trial: 0, score: 3839 trial: 1, score: 4349 trial: 2, score: 4605 trial: 3, score: 5151 trial: 4, score: 6077 Policy 1: 31.4377, -48.546, -3.44979, -6.55641, -49.0568, 90.1496, 18.06, 51.5143, -54.4209, -7.0489, 83.4666, 33.09, -25.0702, -84.4554, -3.2289, -11.5337, 14.9834, 1.38858, 36.554, -62.0007, 3.67954, 112.419, 20.7591, -6.64549, -88, -12.5281, -10.0105, 0.844308, 2.22427, -13.9653, -36.3232, -10, 111.037, 14.4821, -23.2655, 56.7103, -7.05982, -17.2793, 76.6622, 5.70575, -13.6256, 53.8705, 3.25447, 119.303, 3.96682, -30.502, 55.5904, -10.5006, -15.5391, 112.979, -3.77574, -3.07204, 47.4875, 3.22585, 104.991, 33.5301, -18.3366, 15.7738, -5.3008, -24.1124, -11.5537, 0.28358, 119.549, -68.226, -10, 104.192, Average Score: 4804.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.6525, -50.5211, -3.67704, -0.910386, -55, 86.5495, 16.1972, 55.7, -49.392, -9.8785, 85.4956, 32.2553, -27.0347, -82.8489, -5.47401, -10.8533, 16.5045, 6.69211, 40.9117, -60.0815, 5.05964, 113.047, 24.6509, -9.69233, -82.0029, -10.2706, -7.14992, 2.20397, -1.45641, -13.8484, -42.6768, -7.91713, 110.459, 10.4818, -28.6853, 55.9521, -7.63801, -16.2579, 70.8372, 6.79049, -16.5079, 62.2682, 2.86501, 122, 5.23288, -34.7594, 57.6548, -6.41512, -14.4375, 109.927, -7.94382, 2.55863, 45.2814, 9.34583, 107.39, 38.3879, -17.3519, 12.1936, -6.89692, -21.4501, -14.9709, 3.6043, 121.451, -67.3155, -10, 107.846, trial: 0, score: 4573 trial: 1, score: 2559 trial: 2, score: 2493 trial: 3, score: 4029 trial: 4, score: 1949 Policy 1: 29.6525, -50.5211, -3.67704, -0.910386, -55, 86.5495, 16.1972, 55.7, -49.392, -9.8785, 85.4956, 32.2553, -27.0347, -82.8489, -5.47401, -10.8533, 16.5045, 6.69211, 40.9117, -60.0815, 5.05964, 113.047, 24.6509, -9.69233, -82.0029, -10.2706, -7.14992, 2.20397, -1.45641, -13.8484, -42.6768, -7.91713, 110.459, 10.4818, -28.6853, 55.9521, -7.63801, -16.2579, 70.8372, 6.79049, -16.5079, 62.2682, 2.86501, 122, 5.23288, -34.7594, 57.6548, -6.41512, -14.4375, 109.927, -7.94382, 2.55863, 45.2814, 9.34583, 107.39, 38.3879, -17.3519, 12.1936, -6.89692, -21.4501, -14.9709, 3.6043, 121.451, -67.3155, -10, 107.846, Average Score: 3120.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.106, -53.6624, -5.31469, -5.34059, -53.8785, 84.5593, 16.7834, 49.6744, -56.1444, -6.39441, 80.8774, 24.1702, -27.0921, -79.9997, -7.18747, -4.5979, 13.7297, -2.27364, 37.5364, -62.2901, 8.22246, 115.985, 24.7063, -7.89128, -82.0617, -10.8873, -8.78158, -1.68045, -0.75678, -12.7186, -39.5972, -8.98692, 107.299, 12.0561, -22.6878, 51.8017, -7.41041, -20.3732, 76.3783, 4.44925, -12.9142, 54.5546, 2.61986, 122, 2.66443, -33.9838, 50.0543, -2.54572, -19.7206, 106.526, -7.29303, -0.00451482, 46.4273, 12.8819, 103.431, 41.2094, -13.7149, 8.86972, -14.1344, -22.1209, -15.4177, -0.270224, 114.379, -67.984, -10, 110.737, trial: 0, score: 1949 trial: 1, score: 2333 trial: 2, score: 3869 trial: 3, score: 2366 trial: 4, score: 3903 Policy 1: 31.106, -53.6624, -5.31469, -5.34059, -53.8785, 84.5593, 16.7834, 49.6744, -56.1444, -6.39441, 80.8774, 24.1702, -27.0921, -79.9997, -7.18747, -4.5979, 13.7297, -2.27364, 37.5364, -62.2901, 8.22246, 115.985, 24.7063, -7.89128, -82.0617, -10.8873, -8.78158, -1.68045, -0.75678, -12.7186, -39.5972, -8.98692, 107.299, 12.0561, -22.6878, 51.8017, -7.41041, -20.3732, 76.3783, 4.44925, -12.9142, 54.5546, 2.61986, 122, 2.66443, -33.9838, 50.0543, -2.54572, -19.7206, 106.526, -7.29303, -0.00451482, 46.4273, 12.8819, 103.431, 41.2094, -13.7149, 8.86972, -14.1344, -22.1209, -15.4177, -0.270224, 114.379, -67.984, -10, 110.737, Average Score: 2884 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.9863, -56.0055, -8.82713, -1.5748, -49.6461, 89.2536, 12.2872, 54.6534, -46.7202, -5.22297, 85.9646, 33.2998, -18.2794, -84.4579, -11.0783, -12.6136, 16.2158, -1.33151, 31.9891, -63.3899, 3.44907, 121.737, 25.7931, -2.1688, -88, -6.55451, -7.85739, 0.11525, 6.42114, -13.4611, -33.5836, -8.05003, 109.452, 8.53949, -21.0791, 58.5118, -3.27111, -19.0212, 73.0918, 4.13126, -10.2935, 62.3589, 5.48828, 122, 3.66175, -28.2255, 52.8218, -11.2972, -17.5038, 111.144, -4.43053, -3.12167, 48.4814, 4.76928, 104.685, 38.483, -19.0397, 13.001, -9.97791, -20.7223, -20.1961, 1.6443, 118.048, -65.6196, -5.7319, 102.418, trial: 0, score: 5565 trial: 1, score: 4637 trial: 2, score: 3485 trial: 3, score: 3613 trial: 4, score: 4061 Policy 1: 25.9863, -56.0055, -8.82713, -1.5748, -49.6461, 89.2536, 12.2872, 54.6534, -46.7202, -5.22297, 85.9646, 33.2998, -18.2794, -84.4579, -11.0783, -12.6136, 16.2158, -1.33151, 31.9891, -63.3899, 3.44907, 121.737, 25.7931, -2.1688, -88, -6.55451, -7.85739, 0.11525, 6.42114, -13.4611, -33.5836, -8.05003, 109.452, 8.53949, -21.0791, 58.5118, -3.27111, -19.0212, 73.0918, 4.13126, -10.2935, 62.3589, 5.48828, 122, 3.66175, -28.2255, 52.8218, -11.2972, -17.5038, 111.144, -4.43053, -3.12167, 48.4814, 4.76928, 104.685, 38.483, -19.0397, 13.001, -9.97791, -20.7223, -20.1961, 1.6443, 118.048, -65.6196, -5.7319, 102.418, Average Score: 4272.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.9296, -53.1936, -9.09309, -2.35811, -47.2511, 81.8505, 12.715, 52.4171, -51.5767, -10, 84.6617, 30.3853, -24.6524, -85.6216, -7.91797, -10.4424, 23.0852, 7.06331, 40.466, -57.5109, 6.34263, 120.421, 24.0597, -6.4049, -83.454, -8.73171, -5.26937, 2.72537, 1.79902, -15.2493, -43.0361, -7.76764, 110.289, 15.1019, -22.6724, 53.1148, -6.51008, -21.1267, 76.9357, 0.643032, -11.3719, 63.4628, 4.8944, 122, 2.99331, -28.896, 57.286, -7.95978, -22.7391, 109.288, -2.63259, -2.86461, 52.7672, 9.83796, 108.359, 32.1308, -15.6519, 9.24184, -14.7028, -19.5844, -16.1872, -1.08264, 113.532, -69.7555, -10, 109.77, trial: 0, score: 4350 trial: 1, score: 4509 trial: 2, score: 4765 trial: 3, score: 4191 trial: 4, score: 4573 Policy 1: 29.9296, -53.1936, -9.09309, -2.35811, -47.2511, 81.8505, 12.715, 52.4171, -51.5767, -10, 84.6617, 30.3853, -24.6524, -85.6216, -7.91797, -10.4424, 23.0852, 7.06331, 40.466, -57.5109, 6.34263, 120.421, 24.0597, -6.4049, -83.454, -8.73171, -5.26937, 2.72537, 1.79902, -15.2493, -43.0361, -7.76764, 110.289, 15.1019, -22.6724, 53.1148, -6.51008, -21.1267, 76.9357, 0.643032, -11.3719, 63.4628, 4.8944, 122, 2.99331, -28.896, 57.286, -7.95978, -22.7391, 109.288, -2.63259, -2.86461, 52.7672, 9.83796, 108.359, 32.1308, -15.6519, 9.24184, -14.7028, -19.5844, -16.1872, -1.08264, 113.532, -69.7555, -10, 109.77, Average Score: 4477.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.8071, -50.0002, -5.12414, -9.85062, -55, 87.5559, 9.92719, 56.0137, -46.7601, -8.95818, 80.8456, 24.7827, -24.1709, -87.5554, -10.6643, -13.1435, 17.915, 2.95401, 37.0623, -64.1839, 10.6203, 114.767, 24.7705, -1.59297, -88, -9.05646, -4.01529, -2.81499, 5.33514, -8.05363, -36.1156, -10, 107.284, 15.0033, -30.0096, 60.4542, -10.1439, -24.8113, 77.2892, 1.10687, -17.4004, 54.7365, 0.725379, 122, 5.57, -26.2893, 52.0955, -10.7977, -16.2951, 110.334, -4.49594, 3.15008, 44.8826, 5.39228, 101.403, 37.56, -16.5331, 13.4839, -9.22007, -20.1053, -14.7141, 0.549369, 117.571, -65.2839, -4.42125, 108.959, trial: 0, score: 3485 trial: 1, score: 3197 trial: 2, score: 4253 trial: 3, score: 4349 trial: 4, score: 4349 Policy 1: 27.8071, -50.0002, -5.12414, -9.85062, -55, 87.5559, 9.92719, 56.0137, -46.7601, -8.95818, 80.8456, 24.7827, -24.1709, -87.5554, -10.6643, -13.1435, 17.915, 2.95401, 37.0623, -64.1839, 10.6203, 114.767, 24.7705, -1.59297, -88, -9.05646, -4.01529, -2.81499, 5.33514, -8.05363, -36.1156, -10, 107.284, 15.0033, -30.0096, 60.4542, -10.1439, -24.8113, 77.2892, 1.10687, -17.4004, 54.7365, 0.725379, 122, 5.57, -26.2893, 52.0955, -10.7977, -16.2951, 110.334, -4.49594, 3.15008, 44.8826, 5.39228, 101.403, 37.56, -16.5331, 13.4839, -9.22007, -20.1053, -14.7141, 0.549369, 117.571, -65.2839, -4.42125, 108.959, Average Score: 3926.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.62, -56.5708, -4.11573, -4.00967, -52.0943, 82.5525, 9.60967, 49.7176, -48.9425, -10, 83.2159, 25.0756, -17.3703, -83.9556, -6.25772, -12.0171, 13.8875, 1.89568, 35.1578, -55.1848, 9.96703, 116.279, 27.156, -3.64157, -81.7912, -10.3512, -9.2178, 6.06476, 0.655746, -8.82209, -40.2681, -7.17569, 105.905, 17.3385, -22.7374, 53.0413, -8.41926, -22.8502, 76.8736, 2.99598, -13.5408, 56.5145, 3.72522, 122, 2.5887, -31.8985, 51.4588, -9.77093, -21.8826, 111.797, -3.37821, 1.71927, 52.49, 8.67867, 103.414, 34.5316, -14.9503, 14.7156, -5.21675, -23.66, -12.2457, 3.1216, 118.195, -63.4398, -6.2566, 106.595, trial: 0, score: 4157 trial: 1, score: 3549 trial: 2, score: 4317 trial: 3, score: 3134 trial: 4, score: 4055 Policy 1: 26.62, -56.5708, -4.11573, -4.00967, -52.0943, 82.5525, 9.60967, 49.7176, -48.9425, -10, 83.2159, 25.0756, -17.3703, -83.9556, -6.25772, -12.0171, 13.8875, 1.89568, 35.1578, -55.1848, 9.96703, 116.279, 27.156, -3.64157, -81.7912, -10.3512, -9.2178, 6.06476, 0.655746, -8.82209, -40.2681, -7.17569, 105.905, 17.3385, -22.7374, 53.0413, -8.41926, -22.8502, 76.8736, 2.99598, -13.5408, 56.5145, 3.72522, 122, 2.5887, -31.8985, 51.4588, -9.77093, -21.8826, 111.797, -3.37821, 1.71927, 52.49, 8.67867, 103.414, 34.5316, -14.9503, 14.7156, -5.21675, -23.66, -12.2457, 3.1216, 118.195, -63.4398, -6.2566, 106.595, Average Score: 3842.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.5372, -52.3618, -8.17296, -7.72114, -54.8077, 85.3882, 14.6182, 54.666, -55.062, -10, 76.6778, 26.5507, -23.0855, -78.9628, -7.41208, -7.02215, 13.5647, 6.81145, 35.9663, -62.4363, 4.49364, 113.611, 29.1072, -4.35548, -86.6745, -10.0179, -11.6031, 3.90817, 3.47367, -10.2647, -37.4885, -4.30607, 104.375, 10.3321, -24.2583, 54.0773, -7.95228, -24.1494, 74.4112, 0.812544, -9.8733, 54.685, 3.53303, 122, 2.68213, -35.2932, 57.4287, -4.86901, -21.0372, 106.989, -1.61211, 5.94751, 48.2875, 11.9251, 109.627, 41.1401, -16.0657, 10.9971, -6.71484, -25.8958, -19.9964, 1.64079, 118.961, -65.426, -3.62393, 106.382, trial: 0, score: 4061 trial: 1, score: 3005 trial: 2, score: 3452 trial: 3, score: 1565 trial: 4, score: 1853 Policy 1: 22.5372, -52.3618, -8.17296, -7.72114, -54.8077, 85.3882, 14.6182, 54.666, -55.062, -10, 76.6778, 26.5507, -23.0855, -78.9628, -7.41208, -7.02215, 13.5647, 6.81145, 35.9663, -62.4363, 4.49364, 113.611, 29.1072, -4.35548, -86.6745, -10.0179, -11.6031, 3.90817, 3.47367, -10.2647, -37.4885, -4.30607, 104.375, 10.3321, -24.2583, 54.0773, -7.95228, -24.1494, 74.4112, 0.812544, -9.8733, 54.685, 3.53303, 122, 2.68213, -35.2932, 57.4287, -4.86901, -21.0372, 106.989, -1.61211, 5.94751, 48.2875, 11.9251, 109.627, 41.1401, -16.0657, 10.9971, -6.71484, -25.8958, -19.9964, 1.64079, 118.961, -65.426, -3.62393, 106.382, Average Score: 2787.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.0379, -51.0743, -1.74682, -6.24827, -55, 89.8887, 16.2113, 47.0859, -51.4546, -10, 79.6732, 31.2905, -25.6966, -82.506, -8.19294, -11.7323, 14.9368, -1.27308, 34.6444, -61.4232, 3.68444, 114.211, 27.1145, -4.30079, -86.0739, -9.53324, -4.06848, -2.93154, 5.74579, -10.7841, -40.473, -2.9743, 111.266, 15.2722, -24.3119, 52.5973, -11.4337, -19.3316, 79.3443, 2.23302, -13.0149, 61.403, 4.86765, 118.148, 6.1317, -31.0282, 55.5721, -10.1299, -15.0357, 106.463, -0.202624, 2.55715, 42.8628, 12.7988, 106.507, 39.4584, -13.1056, 12.795, -14.83, -23.6124, -17.6178, 0.116914, 114.684, -63.0426, -9.33163, 109.119, trial: 0, score: 3646 trial: 1, score: 3709 trial: 2, score: 4509 trial: 3, score: 4829 trial: 4, score: 4221 Policy 1: 25.0379, -51.0743, -1.74682, -6.24827, -55, 89.8887, 16.2113, 47.0859, -51.4546, -10, 79.6732, 31.2905, -25.6966, -82.506, -8.19294, -11.7323, 14.9368, -1.27308, 34.6444, -61.4232, 3.68444, 114.211, 27.1145, -4.30079, -86.0739, -9.53324, -4.06848, -2.93154, 5.74579, -10.7841, -40.473, -2.9743, 111.266, 15.2722, -24.3119, 52.5973, -11.4337, -19.3316, 79.3443, 2.23302, -13.0149, 61.403, 4.86765, 118.148, 6.1317, -31.0282, 55.5721, -10.1299, -15.0357, 106.463, -0.202624, 2.55715, 42.8628, 12.7988, 106.507, 39.4584, -13.1056, 12.795, -14.83, -23.6124, -17.6178, 0.116914, 114.684, -63.0426, -9.33163, 109.119, Average Score: 4182.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.1649, -51.1773, -6.69799, -4.64673, -52.1914, 82.9631, 14.7542, 54.2353, -54.6287, -7.91848, 85.5527, 31.6751, -17.7313, -83.1488, -10.5675, -9.73236, 14.7866, 6.61796, 36.6987, -58.3394, 3.53032, 117.07, 25.0873, -5.66354, -85.8703, -7.19754, -6.34285, 5.88646, 0.812463, -6.20634, -33.7871, -4.19004, 107.043, 9.13022, -27.2978, 59.4912, -8.59741, -17.2608, 74.6179, 5.23189, -9.45401, 60.6913, 4.12891, 122, 8.29073, -26.7346, 57.9577, -3.39085, -20.5947, 108.551, -4.23911, 2.75837, 50.1835, 12.3401, 106.391, 38.0996, -12.9494, 13.2209, -10.0698, -22.8755, -19.6816, -1.23267, 120.86, -68.498, -7.89433, 107.472, trial: 0, score: 1599 trial: 1, score: 1373 trial: 2, score: 1526 trial: 3, score: 1661 trial: 4, score: 1277 Policy 1: 28.1649, -51.1773, -6.69799, -4.64673, -52.1914, 82.9631, 14.7542, 54.2353, -54.6287, -7.91848, 85.5527, 31.6751, -17.7313, -83.1488, -10.5675, -9.73236, 14.7866, 6.61796, 36.6987, -58.3394, 3.53032, 117.07, 25.0873, -5.66354, -85.8703, -7.19754, -6.34285, 5.88646, 0.812463, -6.20634, -33.7871, -4.19004, 107.043, 9.13022, -27.2978, 59.4912, -8.59741, -17.2608, 74.6179, 5.23189, -9.45401, 60.6913, 4.12891, 122, 8.29073, -26.7346, 57.9577, -3.39085, -20.5947, 108.551, -4.23911, 2.75837, 50.1835, 12.3401, 106.391, 38.0996, -12.9494, 13.2209, -10.0698, -22.8755, -19.6816, -1.23267, 120.86, -68.498, -7.89433, 107.472, Average Score: 1487.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 25.5058, -56.9412, -9.27253, -8.77058, -47.8927, 83.7687, 11.8784, 47.0963, -46.5896, -10, 77.0207, 29.6031, -27.2136, -85.6262, -1.64607, -13.7402, 17.4488, -1.24773, 31.3806, -60.8764, 5.02103, 119.605, 24.3444, -3.16704, -85.9073, -6.40663, -8.58004, 2.14657, -0.159317, -8.63659, -40.2503, -7.4894, 113.021, 14.1764, -28.2466, 56.3065, -5.41391, -15.2228, 72.1194, 5.65028, -9.83447, 57.2412, 4.65089, 122, 3.2289, -32.005, 53.085, -10.0406, -15.8597, 107.78, -1.41658, 5.79614, 44.7679, 9.45765, 110.819, 37.6284, -15.6618, 15.7978, -10.3255, -23.0595, -11.5837, 4.30613, 121.399, -67.919, -10, 108.97, trial: 0, score: 2877 trial: 1, score: 2207 trial: 2, score: 2367 trial: 3, score: 3356 trial: 4, score: 3485 Policy 1: 25.5058, -56.9412, -9.27253, -8.77058, -47.8927, 83.7687, 11.8784, 47.0963, -46.5896, -10, 77.0207, 29.6031, -27.2136, -85.6262, -1.64607, -13.7402, 17.4488, -1.24773, 31.3806, -60.8764, 5.02103, 119.605, 24.3444, -3.16704, -85.9073, -6.40663, -8.58004, 2.14657, -0.159317, -8.63659, -40.2503, -7.4894, 113.021, 14.1764, -28.2466, 56.3065, -5.41391, -15.2228, 72.1194, 5.65028, -9.83447, 57.2412, 4.65089, 122, 3.2289, -32.005, 53.085, -10.0406, -15.8597, 107.78, -1.41658, 5.79614, 44.7679, 9.45765, 110.819, 37.6284, -15.6618, 15.7978, -10.3255, -23.0595, -11.5837, 4.30613, 121.399, -67.919, -10, 108.97, Average Score: 2858.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.4014, -51.0008, -3.12344, -9.09666, -50.1203, 86.2151, 16.4557, 48.494, -48.4556, -10, 76.9276, 27.4856, -18.3444, -83.7888, -11.2217, -7.2643, 15.56, 6.4279, 36.2088, -64.2991, 11.0092, 120.042, 26.2929, -8.70622, -80.261, -8.58698, -9.53472, 4.8114, 4.42139, -13.7455, -38.7278, -7.5212, 108.311, 16.0179, -26.9594, 54.5125, -11.2626, -18.8045, 77.9404, 6.78878, -18.2785, 56.8198, 0.86685, 122, 2.60127, -33.1879, 55.8451, -4.24628, -16.2692, 108.873, -2.68323, 4.96217, 51.2736, 7.50637, 106.461, 36.0813, -14.6923, 13.6406, -11.5985, -26.2318, -17.5982, -1.12845, 116.441, -64.525, -10, 107.703, trial: 0, score: 3581 trial: 1, score: 3679 trial: 2, score: 3903 trial: 3, score: 4221 trial: 4, score: 4669 Policy 1: 29.4014, -51.0008, -3.12344, -9.09666, -50.1203, 86.2151, 16.4557, 48.494, -48.4556, -10, 76.9276, 27.4856, -18.3444, -83.7888, -11.2217, -7.2643, 15.56, 6.4279, 36.2088, -64.2991, 11.0092, 120.042, 26.2929, -8.70622, -80.261, -8.58698, -9.53472, 4.8114, 4.42139, -13.7455, -38.7278, -7.5212, 108.311, 16.0179, -26.9594, 54.5125, -11.2626, -18.8045, 77.9404, 6.78878, -18.2785, 56.8198, 0.86685, 122, 2.60127, -33.1879, 55.8451, -4.24628, -16.2692, 108.873, -2.68323, 4.96217, 51.2736, 7.50637, 106.461, 36.0813, -14.6923, 13.6406, -11.5985, -26.2318, -17.5982, -1.12845, 116.441, -64.525, -10, 107.703, Average Score: 4010.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.1911, -51.41, -0.243959, -1.17325, -49.9422, 88.0134, 9.63722, 50.8437, -52.2588, -10, 78.7586, 32.5753, -27.2662, -85.111, -7.20601, -11.262, 16.0771, 1.31346, 31.8932, -64.3081, 10.5871, 114.191, 27.0598, -5.50682, -85.9937, -13.6474, -6.3974, -1.86425, 3.04391, -13.7773, -34.1863, -2.28603, 112.11, 15.2544, -26.2158, 55.1172, -3.17256, -17.0317, 70.2975, -2.00464, -8.98619, 61.5609, 6.68283, 122, 9.14851, -33.4073, 57.4202, -6.62613, -24.0953, 109.091, -4.87352, 4.34951, 42.9604, 3.50867, 106.028, 39.4654, -21.3133, 13.0589, -9.01947, -27.0985, -11.2918, 6.80397, 122, -63.2025, -6.00653, 102.702, trial: 0, score: 3295 trial: 1, score: 1598 trial: 2, score: 1533 trial: 3, score: 1654 trial: 4, score: 3901 Policy 1: 27.1911, -51.41, -0.243959, -1.17325, -49.9422, 88.0134, 9.63722, 50.8437, -52.2588, -10, 78.7586, 32.5753, -27.2662, -85.111, -7.20601, -11.262, 16.0771, 1.31346, 31.8932, -64.3081, 10.5871, 114.191, 27.0598, -5.50682, -85.9937, -13.6474, -6.3974, -1.86425, 3.04391, -13.7773, -34.1863, -2.28603, 112.11, 15.2544, -26.2158, 55.1172, -3.17256, -17.0317, 70.2975, -2.00464, -8.98619, 61.5609, 6.68283, 122, 9.14851, -33.4073, 57.4202, -6.62613, -24.0953, 109.091, -4.87352, 4.34951, 42.9604, 3.50867, 106.028, 39.4654, -21.3133, 13.0589, -9.01947, -27.0985, -11.2918, 6.80397, 122, -63.2025, -6.00653, 102.702, Average Score: 2396.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.6009, -49.8529, -1.64149, -5.72258, -51.3713, 85.0859, 13.0781, 51.5454, -55.1847, -6.09068, 80.3147, 29.6479, -26.0875, -85.5325, -8.55396, -8.55749, 21.3368, 3.1112, 39.1777, -56.4068, 7.13369, 121.815, 20.8158, -6.52729, -79.5285, -4.60437, -12.7406, 0.149306, 6.90304, -11.1341, -36.6812, -4.68137, 111.754, 17.5693, -20.9861, 56.1549, -8.92433, -23.2757, 77.4911, -0.218982, -8.53163, 61.7321, 0.633681, 121.866, 1.04044, -33.5738, 55.3077, -4.00024, -17.049, 108.42, -3.74107, 6.15581, 45.4977, 12.1037, 107.126, 33.4744, -13.3073, 9.11744, -6.98921, -25.0382, -13.2721, 1.85141, 114.464, -65.0006, -7.95573, 102.795, trial: 0, score: 2013 trial: 1, score: 3350 trial: 2, score: 3583 trial: 3, score: 3357 trial: 4, score: 2550 Policy 1: 29.6009, -49.8529, -1.64149, -5.72258, -51.3713, 85.0859, 13.0781, 51.5454, -55.1847, -6.09068, 80.3147, 29.6479, -26.0875, -85.5325, -8.55396, -8.55749, 21.3368, 3.1112, 39.1777, -56.4068, 7.13369, 121.815, 20.8158, -6.52729, -79.5285, -4.60437, -12.7406, 0.149306, 6.90304, -11.1341, -36.6812, -4.68137, 111.754, 17.5693, -20.9861, 56.1549, -8.92433, -23.2757, 77.4911, -0.218982, -8.53163, 61.7321, 0.633681, 121.866, 1.04044, -33.5738, 55.3077, -4.00024, -17.049, 108.42, -3.74107, 6.15581, 45.4977, 12.1037, 107.126, 33.4744, -13.3073, 9.11744, -6.98921, -25.0382, -13.2721, 1.85141, 114.464, -65.0006, -7.95573, 102.795, Average Score: 2970.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.0893, -51.8145, -8.24912, -3.33362, -51.203, 88.9361, 15.1596, 51.0561, -51.8174, -5.12642, 85.9325, 24.7199, -20.7402, -79.9651, -3.0576, -12.7093, 21.365, 1.84286, 32.2113, -64.2544, 3.75881, 118.242, 25.2608, -7.14816, -80.0111, -4.98215, -5.62966, 5.87671, 1.75736, -14.1307, -37.3356, -4.16128, 104.396, 17.7174, -25.0244, 60.712, -3.09113, -24.2773, 78.9588, 4.79206, -11.2394, 58.2333, 3.51056, 119.132, 0.756165, -32.4727, 55.6801, -2.76109, -15.5004, 109.952, -8.23926, 0.667495, 52.2196, 11.0107, 101.913, 41.1371, -21.7641, 9.19744, -8.64182, -23.7153, -19.8293, 2.72503, 114.698, -69.3622, -9.50923, 106.43, trial: 0, score: 1630 trial: 1, score: 1342 trial: 2, score: 1309 trial: 3, score: 1437 trial: 4, score: 1462 Policy 1: 28.0893, -51.8145, -8.24912, -3.33362, -51.203, 88.9361, 15.1596, 51.0561, -51.8174, -5.12642, 85.9325, 24.7199, -20.7402, -79.9651, -3.0576, -12.7093, 21.365, 1.84286, 32.2113, -64.2544, 3.75881, 118.242, 25.2608, -7.14816, -80.0111, -4.98215, -5.62966, 5.87671, 1.75736, -14.1307, -37.3356, -4.16128, 104.396, 17.7174, -25.0244, 60.712, -3.09113, -24.2773, 78.9588, 4.79206, -11.2394, 58.2333, 3.51056, 119.132, 0.756165, -32.4727, 55.6801, -2.76109, -15.5004, 109.952, -8.23926, 0.667495, 52.2196, 11.0107, 101.913, 41.1371, -21.7641, 9.19744, -8.64182, -23.7153, -19.8293, 2.72503, 114.698, -69.3622, -9.50923, 106.43, Average Score: 1436 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 31.3688, -50.4918, -6.63329, -8.69178, -53.0881, 85.5525, 17.5914, 56.3359, -48.3326, -10, 85.4058, 28.8295, -24.886, -87.3755, -6.02029, -13.4898, 19.751, 3.63891, 36.6958, -57.4144, 5.84897, 122, 27.4501, -5.44584, -87.0259, -8.25226, -4.69349, 3.20216, 6.29296, -14.1006, -42.7995, -1.72945, 113.283, 10.5694, -23.9924, 56.3906, -9.04217, -22.8378, 74.1683, 5.65949, -14.4146, 59.7722, 3.76761, 122, 3.20477, -34.3556, 53.9617, -4.58135, -21.5425, 107.763, -9.2919, 2.10331, 45.8236, 12.0812, 108.842, 40.9628, -14.8271, 8.28632, -14.0382, -23.8862, -16.9892, 6.81105, 122, -70.0997, -9.42667, 107.974, trial: 0, score: 3125 trial: 1, score: 1533 trial: 2, score: 1693 trial: 3, score: 2719 trial: 4, score: 1534 Policy 1: 31.3688, -50.4918, -6.63329, -8.69178, -53.0881, 85.5525, 17.5914, 56.3359, -48.3326, -10, 85.4058, 28.8295, -24.886, -87.3755, -6.02029, -13.4898, 19.751, 3.63891, 36.6958, -57.4144, 5.84897, 122, 27.4501, -5.44584, -87.0259, -8.25226, -4.69349, 3.20216, 6.29296, -14.1006, -42.7995, -1.72945, 113.283, 10.5694, -23.9924, 56.3906, -9.04217, -22.8378, 74.1683, 5.65949, -14.4146, 59.7722, 3.76761, 122, 3.20477, -34.3556, 53.9617, -4.58135, -21.5425, 107.763, -9.2919, 2.10331, 45.8236, 12.0812, 108.842, 40.9628, -14.8271, 8.28632, -14.0382, -23.8862, -16.9892, 6.81105, 122, -70.0997, -9.42667, 107.974, Average Score: 2120.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.8923, -55.8101, -8.88653, -6.25018, -53.7008, 87.9146, 11.3838, 51.9348, -53.3874, -9.24087, 83.594, 26.2718, -24.5383, -82.0675, -8.11092, -10.7945, 16.1193, 0.205185, 31.631, -55.7054, 11.454, 113.232, 22.2123, -2.79112, -83.7097, -12.5974, -5.58023, 5.77271, 7.49974, -13.2337, -33.6878, -3.19915, 108.697, 17.0172, -26.6256, 51.4566, -4.35597, -24.8124, 73.3057, 3.35114, -8.97055, 55.321, 5.50642, 117.689, 0.0392097, -29.2239, 55.1031, -10.8068, -24.0737, 109.009, -9.59291, 4.90807, 50.2514, 5.64025, 105.768, 35.5264, -16.9063, 16.986, -5.98095, -19.5506, -13.0983, 7.11505, 115.348, -67.0292, -8.08703, 104.61, trial: 0, score: 1854 trial: 1, score: 2198 trial: 2, score: 2109 trial: 3, score: 2077 trial: 4, score: 1590 Policy 1: 27.8923, -55.8101, -8.88653, -6.25018, -53.7008, 87.9146, 11.3838, 51.9348, -53.3874, -9.24087, 83.594, 26.2718, -24.5383, -82.0675, -8.11092, -10.7945, 16.1193, 0.205185, 31.631, -55.7054, 11.454, 113.232, 22.2123, -2.79112, -83.7097, -12.5974, -5.58023, 5.77271, 7.49974, -13.2337, -33.6878, -3.19915, 108.697, 17.0172, -26.6256, 51.4566, -4.35597, -24.8124, 73.3057, 3.35114, -8.97055, 55.321, 5.50642, 117.689, 0.0392097, -29.2239, 55.1031, -10.8068, -24.0737, 109.009, -9.59291, 4.90807, 50.2514, 5.64025, 105.768, 35.5264, -16.9063, 16.986, -5.98095, -19.5506, -13.0983, 7.11505, 115.348, -67.0292, -8.08703, 104.61, Average Score: 1965.6 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 27.8443, -49.25, -0.584072, -7.83321, -48.9937, 85.1341, 14.158, 55.0713, -55.8498, -6.40562, 84.9858, 30.0611, -27.0516, -85.2193, -2.6647, -10.9845, 20.8554, 7.24431, 37.6208, -57.7083, 9.18068, 120.531, 29.546, -7.46863, -81.35, -11.0393, -10.7743, 4.09065, 7.0949, -10.58, -41.1824, -5.92775, 106.84, 17.0038, -24.7041, 60.4009, -4.05271, -22.347, 73.4286, -2.64638, -8.86734, 57.5733, 3.78791, 122, 4.47862, -32.1116, 51.6975, -9.84451, -22.2065, 110.201, -4.90001, 0.79758, 51.7061, 12.9551, 107.673, 33.3656, -19.4235, 16.7667, -6.32307, -17.4551, -19.8395, -1.72497, 115.072, -66.1383, -5.13526, 106.715, trial: 0, score: 2015 trial: 1, score: 2813 trial: 2, score: 1469 trial: 3, score: 3221 trial: 4, score: 3037 Policy 1: 27.8443, -49.25, -0.584072, -7.83321, -48.9937, 85.1341, 14.158, 55.0713, -55.8498, -6.40562, 84.9858, 30.0611, -27.0516, -85.2193, -2.6647, -10.9845, 20.8554, 7.24431, 37.6208, -57.7083, 9.18068, 120.531, 29.546, -7.46863, -81.35, -11.0393, -10.7743, 4.09065, 7.0949, -10.58, -41.1824, -5.92775, 106.84, 17.0038, -24.7041, 60.4009, -4.05271, -22.347, 73.4286, -2.64638, -8.86734, 57.5733, 3.78791, 122, 4.47862, -32.1116, 51.6975, -9.84451, -22.2065, 110.201, -4.90001, 0.79758, 51.7061, 12.9551, 107.673, 33.3656, -19.4235, 16.7667, -6.32307, -17.4551, -19.8395, -1.72497, 115.072, -66.1383, -5.13526, 106.715, Average Score: 2511 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 26.5021, -51.9594, -1.01135, -4.10322, -48.4721, 86.5034, 10.2928, 48.8957, -51.1841, -9.76933, 80.8018, 28.3399, -25.6069, -86.2915, -11.2107, -5.60559, 15.6819, -0.336887, 35.1981, -62.6951, 2.62861, 112.722, 29.099, -5.58073, -87.7367, -13.0822, -13.2359, -2.16867, 2.09334, -11.605, -39.3294, -10, 109.925, 8.50345, -24.8942, 58.1845, -6.30174, -22.6511, 79.7574, 5.26852, -12.5898, 56.5263, -1.17816, 117.074, 0, -26.8874, 54.0777, -3.30329, -20.4372, 111.094, -4.06027, -0.666971, 43.4665, 10.8958, 109.384, 39.5005, -22.2222, 7.77877, -7.018, -20.4173, -17.9879, -0.675182, 113.193, -71.4987, -10, 107.198, trial: 0, score: 1720 trial: 1, score: 1632 trial: 2, score: 1878 trial: 3, score: 4543 trial: 4, score: 1949 Policy 1: 26.5021, -51.9594, -1.01135, -4.10322, -48.4721, 86.5034, 10.2928, 48.8957, -51.1841, -9.76933, 80.8018, 28.3399, -25.6069, -86.2915, -11.2107, -5.60559, 15.6819, -0.336887, 35.1981, -62.6951, 2.62861, 112.722, 29.099, -5.58073, -87.7367, -13.0822, -13.2359, -2.16867, 2.09334, -11.605, -39.3294, -10, 109.925, 8.50345, -24.8942, 58.1845, -6.30174, -22.6511, 79.7574, 5.26852, -12.5898, 56.5263, -1.17816, 117.074, 0, -26.8874, 54.0777, -3.30329, -20.4372, 111.094, -4.06027, -0.666971, 43.4665, 10.8958, 109.384, 39.5005, -22.2222, 7.77877, -7.018, -20.4173, -17.9879, -0.675182, 113.193, -71.4987, -10, 107.198, Average Score: 2344.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.4712, -53.3284, -7.91452, -7.31564, -49.8739, 84.8479, 14.6292, 47.2757, -47.7498, -7.29984, 85.6956, 24.3781, -20.7727, -84.728, -5.04903, -12.5359, 15.6471, 4.43168, 31.4787, -55.9871, 9.02035, 114.203, 22.0382, -2.25058, -88, -5.01393, -12.2641, 3.78554, 5.2152, -13.1142, -36.2332, -4.54721, 105.611, 12.4149, -22.0589, 55.1893, -2.61672, -23.6616, 77.1321, 0.41563, -15.5971, 53.8058, 2.26698, 118.306, 4.61678, -26.0319, 56.6386, -11.5596, -21.2729, 112.461, -7.30503, -3.54165, 47.9062, 10.7479, 110.89, 41.5346, -21.8551, 15.6002, -7.07448, -23.2881, -14.2404, 1.49979, 116.67, -64.444, -10, 102.782, trial: 0, score: 2495 trial: 1, score: 1974 trial: 2, score: 2109 trial: 3, score: 2237 trial: 4, score: 2175 Policy 1: 24.4712, -53.3284, -7.91452, -7.31564, -49.8739, 84.8479, 14.6292, 47.2757, -47.7498, -7.29984, 85.6956, 24.3781, -20.7727, -84.728, -5.04903, -12.5359, 15.6471, 4.43168, 31.4787, -55.9871, 9.02035, 114.203, 22.0382, -2.25058, -88, -5.01393, -12.2641, 3.78554, 5.2152, -13.1142, -36.2332, -4.54721, 105.611, 12.4149, -22.0589, 55.1893, -2.61672, -23.6616, 77.1321, 0.41563, -15.5971, 53.8058, 2.26698, 118.306, 4.61678, -26.0319, 56.6386, -11.5596, -21.2729, 112.461, -7.30503, -3.54165, 47.9062, 10.7479, 110.89, 41.5346, -21.8551, 15.6002, -7.07448, -23.2881, -14.2404, 1.49979, 116.67, -64.444, -10, 102.782, Average Score: 2198 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 28.9896, -48.2849, -6.93401, -5.33876, -50.6436, 88.1153, 16.9786, 51.6193, -54.4223, -10, 85.7591, 30.1586, -24.2238, -78.5363, -3.32728, -7.07227, 14.1789, 6.69159, 33.9981, -59.5339, 5.32073, 112.745, 26.1298, -10.751, -82.813, -12.6515, -11.6658, 1.71748, 2.45002, -10.44, -41.3532, -2.87857, 113.547, 13.6049, -24.2852, 55.8212, -7.96292, -19.8492, 70.7711, 4.9189, -14.8582, 57.8251, 1.65073, 118.975, 3.00425, -26.6815, 51.5998, -10.1717, -16.5341, 111.787, -0.25002, 3.66694, 46.0526, 6.77471, 101.331, 37.1149, -17.2718, 8.38057, -14.0262, -20.1499, -17.1641, 4.15941, 117.108, -67.0026, -10, 108.843, trial: 0, score: 5110 trial: 1, score: 4573 trial: 2, score: 4093 trial: 3, score: 4887 trial: 4, score: 4253 Policy 1: 28.9896, -48.2849, -6.93401, -5.33876, -50.6436, 88.1153, 16.9786, 51.6193, -54.4223, -10, 85.7591, 30.1586, -24.2238, -78.5363, -3.32728, -7.07227, 14.1789, 6.69159, 33.9981, -59.5339, 5.32073, 112.745, 26.1298, -10.751, -82.813, -12.6515, -11.6658, 1.71748, 2.45002, -10.44, -41.3532, -2.87857, 113.547, 13.6049, -24.2852, 55.8212, -7.96292, -19.8492, 70.7711, 4.9189, -14.8582, 57.8251, 1.65073, 118.975, 3.00425, -26.6815, 51.5998, -10.1717, -16.5341, 111.787, -0.25002, 3.66694, 46.0526, 6.77471, 101.331, 37.1149, -17.2718, 8.38057, -14.0262, -20.1499, -17.1641, 4.15941, 117.108, -67.0026, -10, 108.843, Average Score: 4583.2 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 22.3371, -53.0507, -2.84342, -4.2514, -46.8732, 86.6185, 10.8941, 50.6568, -54.7235, -10, 78.7983, 31.0156, -19.1799, -84.4171, -4.39284, -6.02753, 16.023, -0.41626, 31.9233, -59.2222, 9.67976, 120.857, 24.6203, -3.99659, -85.356, -8.50567, -4.46906, 0.19341, -0.29564, -9.33731, -39.6942, -1.78846, 110.313, 17.3732, -22.5204, 52.1344, -3.621, -15.1718, 75.6918, 4.07966, -14.3274, 60.5647, 0.10334, 122, 8.28021, -30.3649, 53.5772, -5.33085, -14.6097, 108.633, -8.52055, -3.20871, 49.7783, 9.09415, 105.025, 37.6869, -14.3177, 10.4979, -11.848, -23.7855, -16.2836, -0.00052404, 122, -67.1459, -3.4983, 101.876, trial: 0, score: 2902 trial: 1, score: 1917 trial: 2, score: 2301 trial: 3, score: 3261 trial: 4, score: 1853 Policy 1: 22.3371, -53.0507, -2.84342, -4.2514, -46.8732, 86.6185, 10.8941, 50.6568, -54.7235, -10, 78.7983, 31.0156, -19.1799, -84.4171, -4.39284, -6.02753, 16.023, -0.41626, 31.9233, -59.2222, 9.67976, 120.857, 24.6203, -3.99659, -85.356, -8.50567, -4.46906, 0.19341, -0.29564, -9.33731, -39.6942, -1.78846, 110.313, 17.3732, -22.5204, 52.1344, -3.621, -15.1718, 75.6918, 4.07966, -14.3274, 60.5647, 0.10334, 122, 8.28021, -30.3649, 53.5772, -5.33085, -14.6097, 108.633, -8.52055, -3.20871, 49.7783, 9.09415, 105.025, 37.6869, -14.3177, 10.4979, -11.848, -23.7855, -16.2836, -0.00052404, 122, -67.1459, -3.4983, 101.876, Average Score: 2446.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 32.0676, -52.3548, -1.57815, -7.76235, -48.2824, 82.7587, 17.1182, 47.3622, -53.5092, -10, 83.5598, 28.5781, -20.6106, -85.7451, -10.3012, -9.4, 18.4729, 5.01746, 38.173, -58.0964, 5.59996, 112.977, 28.3317, -6.07355, -88, -11.4916, -5.89941, 1.88507, 5.47039, -13.2152, -41.3187, -7.82365, 113.137, 12.6232, -22.4777, 51.4579, -4.94311, -24.0131, 70.0352, 4.03506, -12.2169, 59.4066, 5.17323, 118.222, 0.515645, -27.4926, 53.3055, -9.12659, -20.7256, 108.392, -1.25915, 4.02323, 47.507, 12.0711, 104.334, 40.434, -20.7941, 7.89453, -4.9071, -24.201, -15.3945, -1.47839, 116.074, -68.2266, -8.58044, 104.604, trial: 0, score: 1629 trial: 1, score: 3358 trial: 2, score: 3390 trial: 3, score: 2013 trial: 4, score: 2487 Policy 1: 32.0676, -52.3548, -1.57815, -7.76235, -48.2824, 82.7587, 17.1182, 47.3622, -53.5092, -10, 83.5598, 28.5781, -20.6106, -85.7451, -10.3012, -9.4, 18.4729, 5.01746, 38.173, -58.0964, 5.59996, 112.977, 28.3317, -6.07355, -88, -11.4916, -5.89941, 1.88507, 5.47039, -13.2152, -41.3187, -7.82365, 113.137, 12.6232, -22.4777, 51.4579, -4.94311, -24.0131, 70.0352, 4.03506, -12.2169, 59.4066, 5.17323, 118.222, 0.515645, -27.4926, 53.3055, -9.12659, -20.7256, 108.392, -1.25915, 4.02323, 47.507, 12.0711, 104.334, 40.434, -20.7941, 7.89453, -4.9071, -24.201, -15.3945, -1.47839, 116.074, -68.2266, -8.58044, 104.604, Average Score: 2575.4 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 24.7486, -51.7038, -8.22511, -1.44491, -51.2183, 87.0791, 15.1963, 47.4431, -52.4105, -8.72776, 84.8179, 24.9779, -18.8391, -81.5352, -9.61469, -11.1209, 22.8878, 1.55185, 34.5177, -60.3904, 4.11071, 112.273, 28.7136, -7.79334, -81.8945, -12.4881, -10.3686, -2.92701, -1.69319, -11.9333, -33.6716, -10, 113.011, 12.9176, -22.9528, 54.1783, -8.04269, -21.1624, 73.5123, -0.518085, -15.2593, 54.7817, 6.5052, 122, 8.67759, -31.8006, 58.7556, -3.91154, -18.7352, 112.712, -1.98712, 3.19013, 52.0694, 3.45548, 104.035, 32.7364, -18.4573, 14.6993, -8.55447, -25.8625, -11.715, 2.12307, 113.833, -63.9753, -7.60908, 102.38, trial: 0, score: 4509 trial: 1, score: 4350 trial: 2, score: 4061 trial: 3, score: 3711 trial: 4, score: 4253 Policy 1: 24.7486, -51.7038, -8.22511, -1.44491, -51.2183, 87.0791, 15.1963, 47.4431, -52.4105, -8.72776, 84.8179, 24.9779, -18.8391, -81.5352, -9.61469, -11.1209, 22.8878, 1.55185, 34.5177, -60.3904, 4.11071, 112.273, 28.7136, -7.79334, -81.8945, -12.4881, -10.3686, -2.92701, -1.69319, -11.9333, -33.6716, -10, 113.011, 12.9176, -22.9528, 54.1783, -8.04269, -21.1624, 73.5123, -0.518085, -15.2593, 54.7817, 6.5052, 122, 8.67759, -31.8006, 58.7556, -3.91154, -18.7352, 112.712, -1.98712, 3.19013, 52.0694, 3.45548, 104.035, 32.7364, -18.4573, 14.6993, -8.55447, -25.8625, -11.715, 2.12307, 113.833, -63.9753, -7.60908, 102.38, Average Score: 4176.8 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 23.0289, -57.8413, -7.2665, -5.31428, -55, 83.3843, 10.7056, 50.3482, -51.4371, -10, 79.9925, 29.9869, -25.1986, -85.6749, -11.1042, -9.22158, 20.3368, 3.17614, 39.2764, -55.4051, 4.36048, 115.222, 22.7172, -9.02064, -86.0805, -6.52021, -11.7792, -2.12934, 7.37237, -6.23307, -34.4761, -8.44103, 108.29, 17.0511, -23.1038, 55.4693, -4.7122, -21.5474, 78.6052, 0.750348, -18.2941, 61.8756, 5.3272, 119.816, 1.63996, -32.2019, 54.7856, -2.82994, -19.0494, 107.087, -3.62687, 4.12491, 49.3496, 6.0161, 107.688, 37.4391, -15.1519, 7.32439, -11.4343, -24.734, -11.8133, 4.87852, 119.554, -71.9128, -8.00882, 101.268, trial: 0, score: 4605 trial: 1, score: 5215 trial: 2, score: 4374 trial: 3, score: 3741 trial: 4, score: 4125 Policy 1: 23.0289, -57.8413, -7.2665, -5.31428, -55, 83.3843, 10.7056, 50.3482, -51.4371, -10, 79.9925, 29.9869, -25.1986, -85.6749, -11.1042, -9.22158, 20.3368, 3.17614, 39.2764, -55.4051, 4.36048, 115.222, 22.7172, -9.02064, -86.0805, -6.52021, -11.7792, -2.12934, 7.37237, -6.23307, -34.4761, -8.44103, 108.29, 17.0511, -23.1038, 55.4693, -4.7122, -21.5474, 78.6052, 0.750348, -18.2941, 61.8756, 5.3272, 119.816, 1.63996, -32.2019, 54.7856, -2.82994, -19.0494, 107.087, -3.62687, 4.12491, 49.3496, 6.0161, 107.688, 37.4391, -15.1519, 7.32439, -11.4343, -24.734, -11.8133, 4.87852, 119.554, -71.9128, -8.00882, 101.268, Average Score: 4412 --------------------------------- New Iteration Current Best Policy: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Current Best Policy Score: 7594.6 Policy 0 will be: 27.091, -53.0178, -4.32207, -4.91725, -51.4294, 86.4038, 13.7953, 51.8714, -51.351, -10, 81.404, 28.4013, -22.3358, -83.1791, -6.60788, -9.0707, 18.1552, 2.3162, 36.2648, -59.5264, 6.71769, 117.264, 24.8442, -6.55966, -84.3469, -9.08653, -8.32782, 1.7034, 2.65438, -10.6303, -38.0787, -6.54586, 108.603, 12.9549, -25.7587, 55.9513, -6.86561, -20.1025, 74.9651, 2.15179, -13.3623, 58.596, 2.34189, 122, 4.63933, -30.4272, 54.4298, -7.02545, -19.1494, 109.173, -4.67671, 1.38438, 47.7961, 8.21436, 106.215, 36.9916, -17.6125, 12.3087, -9.88911, -22.3103, -15.6013, 3.03753, 118.031, -67.6504, -8.26473, 106.045, Policy 1 will be: 29.2399, -55.3524, -2.55983, -2.4895, -47.249, 81.7841, 17.0163, 49.9729, -55.3537, -5.07121, 80.1692, 23.7678, -26.9203, -83.5018, -9.35658, -6.94038, 14.4359, 0.954835, 41.0324, -61.3848, 5.71413, 122, 26.8756, -3.65593, -79.4814, -6.1304, -10.2171, 2.63192, 0.215099, -14.8444, -34.6025, -8.46804, 108.545, 10.6628, -21.5654, 52.9895, -7.06079, -22.5501, 70.323, 4.62443, -8.87843, 59.7383, 0.924319, 122, 6.95787, -30.6303, 54.6608, -11.3754, -22.0942, 111.229, -5.30662, 5.08107, 43.7654, 9.47399, 108.047, 35.7151, -18.9171, 8.42645, -13.808, -24.3253, -13.0266, 0.957916, 122, -64.8399, -4.58868, 102.899, trial: 0, score: 2719 trial: 1, score: 3581 trial: 2, score: 3901 trial: 3, score: 4022