New Algorithm, initial policy: 25, -61, 0, 50, -3, 33, -4, 91, -43, 0, 127, 25, -80, 0, 44, -6, 11, 0, 49, -34, 0, 126, 25, 0, 0, 0, -35, 16, 0, 37, -8, 0, 121, 25, -40, 0, -11, 0, 44, 0, 66, -21, 0, 126, 25, -50, 0, -11, 0, 50, 0, 63, -22, 0, 125, 25, -63, 0, -11, 0, 67, 0, 62, -22, 0, 125, trial: 0, score: 0 trial: 1, score: 0 Policy 0: 25, -61, 0, 45, -3, 33, -4, 91, -43, 0, 122, 25, -75, 0, 44, -6, 11, 0, 49, -34, 0, 122, 25, 0, 0, 0, -35, 16, 0, 37, -8, 0, 121, 25, -40, 0, -11, -3, 44, 0, 66, -21, 0, 122, 25, -50, 0, -11, -3, 50, 0, 63, -22, 0, 122, 25, -63, 0, -11, -3, 67, 0, 62, -22, 0, 122, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 15.5397, -66.7838, 0.0180338, 45, -5.04836, 24.4437, -4.54234, 88.508, -52.0273, -7.51388, 121.868, 26.522, -75, -6.39136, 45, -3, 13.155, 5.07083, 58.3045, -36.3782, 1.79884, 122, 15.8592, -1.2001, -0.210742, 0.119898, -34.429, 3.97199, 3.59264, 39.4548, -24.4432, -5.78606, 120.591, 19.8084, -34.935, 1.18491, -11.8802, -3, 66.4922, 2.30916, 61.4379, -39.7778, 3.54136, 120.069, 29.9424, -52.2991, 11.5267, -10.2776, -3, 38.7821, -7.43486, 62.9742, -37.8118, 1.21255, 122, 28.6847, -57.7494, -17.0596, -14.3439, -3, 56.0249, 3.44604, 58.8782, -29.6162, -1.80932, 122, Average Score: 0 trial: 0, score: 2 trial: 1, score: 0 Policy 2: 16.6793, -61.1612, -1.50473, 45, -3, 14.9081, 2.28197, 82.9444, -29.8266, -5.20122, 122, 21.6848, -75, 9.76725, 41.097, -3.34356, -6.48694, -6.85075, 45.7912, -32.5904, 2.57218, 122, 20.7455, 0, 1.06783, -5.79688, -37.4481, 13.9387, -8.56931, 40.2323, 14.5987, -0.638098, 122, 32.7428, -38.8493, -4.73086, -15, -3, 56.9685, 6.88121, 78.0733, -16.9466, 7.32576, 122, 18.771, -51.3206, -14.3572, -5.99061, -3, 74.2624, 5.73362, 77.1328, -32.4325, 1.276, 111.17, 17.8274, -56.1454, -0.621512, -9.72528, -3, 58.7413, -8.31706, 63.4955, -39.262, -6.31666, 122, Average Score: 1 trial: 0, score: 1 trial: 1, score: 0 Policy 3: 23.6712, -59.8079, 7.70861, 45, -3, 43.4, -10, 91.1497, -47.0644, -5.96312, 121.342, 27.4648, -75, 2.6852, 42.8527, -7.60181, 25.5827, 9.10689, 35.3646, -10.8463, 1.91919, 122, 25.8087, -2.28162, -5.852, 5.55162, -30.4266, -7.74858, -9.36234, 49.9579, -30.6975, 8.72672, 122, 33.0632, -42.6446, -5.13666, -14.6877, -3, 63.1649, -2.29951, 56.5013, -10.5591, 0.79299, 122, 32.8017, -46.8405, 2.36434, -15, -4.72232, 58.9714, 2.27855, 74.689, 2.06026, 5.26196, 122, 17.9501, -66.6541, 8.46185, -7.8124, -3, 88.8989, 8.15387, 74.4099, 1.09916, 0.20151, 112.543, Average Score: 0.5 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 25.1157, -59.6618, 8.97289, 44.8014, -3, 25.0572, 4.98237, 104.692, -39.5182, 3.44512, 122, 29.9284, -75, 3.23902, 39.705, -5.10089, 3.94376, 7.3975, 50.9064, -50.2715, 7.19017, 122, 31.8202, -4.69381, 4.7299, 4.3551, -37.2499, 9.08481, -4.66766, 28.6693, -26.7135, 5.12045, 114.3, 27.9412, -34.3887, 0.929828, -11.6813, -3, 33.9182, 0.781105, 76.4891, -19.698, 9.4809, 122, 16.7566, -48.696, -15.9872, -13.941, -5.61884, 45.0042, -2.76928, 57.8902, -10.9413, -2.96904, 116.98, 31.4621, -58.6546, 16.0446, -9.56757, -3, 45.2413, 4.19804, 64.6545, -1.16419, 2.12114, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 16.6793, -61.1612, -1.50473, 45, -3, 14.9081, 2.28197, 82.9444, -29.8266, -5.20122, 122, 21.6848, -75, 9.76725, 41.097, -3.34356, -6.48694, -6.85075, 45.7912, -32.5904, 2.57218, 122, 20.7455, 0, 1.06783, -5.79688, -37.4481, 13.9387, -8.56931, 40.2323, 14.5987, -0.638098, 122, 32.7428, -38.8493, -4.73086, -15, -3, 56.9685, 6.88121, 78.0733, -16.9466, 7.32576, 122, 18.771, -51.3206, -14.3572, -5.99061, -3, 74.2624, 5.73362, 77.1328, -32.4325, 1.276, 111.17, 17.8274, -56.1454, -0.621512, -9.72528, -3, 58.7413, -8.31706, 63.4955, -39.262, -6.31666, 122, Current Best Policy Score: 1 Policy 0 will be: 16.6793, -61.1612, -1.50473, 45, -3, 14.9081, 2.28197, 82.9444, -29.8266, -5.20122, 122, 21.6848, -75, 9.76725, 41.097, -3.34356, -6.48694, -6.85075, 45.7912, -32.5904, 2.57218, 122, 20.7455, 0, 1.06783, -5.79688, -37.4481, 13.9387, -8.56931, 40.2323, 14.5987, -0.638098, 122, 32.7428, -38.8493, -4.73086, -15, -3, 56.9685, 6.88121, 78.0733, -16.9466, 7.32576, 122, 18.771, -51.3206, -14.3572, -5.99061, -3, 74.2624, 5.73362, 77.1328, -32.4325, 1.276, 111.17, 17.8274, -56.1454, -0.621512, -9.72528, -3, 58.7413, -8.31706, 63.4955, -39.262, -6.31666, 122, Policy 1 will be: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Policy 2 will be: 21.2435, -58.4655, 13.9038, 45, -3, 6.36047, 4.76786, 92.8761, -23.7699, 0.26661, 122, 27.8338, -75, 10.5339, 41.2155, -3, -29.7628, -10, 48.3104, -50.2869, 10.5031, 114.636, 21.9745, -4.21594, 9.19487, -9.26758, -42.6235, 26.1359, -10, 46.2686, -9.04041, -8.27534, 109.698, 37.5562, -36.0526, -3.41605, -15, -3, 64.6013, 3.39598, 81.1668, -0.452337, 4.0643, 114.911, 14.753, -56.2089, -20.127, -11.7498, -3, 61.8836, 3.85134, 88.2336, -33.2956, 9.71137, 108.644, 25.2768, -53.1493, -17.3279, -5.98419, -4.35534, 78.6586, -10, 62.4041, -33.404, -4.87344, 122, Policy 3 will be: 14.5225, -61.5455, -8.65673, 45, -8.2341, -3.92979, -5.80922, 96.1101, -50.9586, 0.936094, 112.709, 27.8478, -68.3321, -3.03185, 35.568, -3, -13.2713, -10, 41.8007, -28.1689, -4.03092, 111.543, 16.3539, -0.241609, 18.1475, -5.99976, -37.0514, 23.875, -10, 35.2719, 22.3885, -9.95732, 118.287, 42.5517, -40.3999, 3.96604, -14.6008, -3.54336, 48.8489, 12.1448, 69.2231, -6.89759, 12.7245, 112.828, 28.1991, -56.314, -16.5411, -11.9624, -3, 56.3858, 1.78395, 67.4968, -22.1877, -6.5984, 122, 18.9477, -58.1482, 15.2849, -6.1313, -3, 40.1866, -10, 55.5131, -56.6964, -9.63162, 122, Policy 4 will be: 7.19339, -58.1264, -18.4909, 44.4051, -5.95118, -9.05667, 5.30748, 81.4493, -40.7056, -3.89412, 111.06, 15.7558, -67.6557, -0.488741, 42.8995, -7.41167, -25.8211, -10, 40.0617, -16.0462, -5.12773, 122, 29.8806, 0, -12.8542, -3.46909, -40.539, 36.9756, -2.1533, 54.0605, 17.3857, -7.04076, 122, 28.6934, -33.7889, -16.1854, -10.8417, -7.94557, 57.9615, 2.81769, 81.3851, -13.5787, -1.36599, 111.177, 28.5551, -44.5651, -15.5232, -7.53566, -3, 68.8938, 15.0505, 87.2299, -47.8779, 10.8154, 122, 12.6828, -60.5772, -1.92744, -10.0497, -3, 38.3174, -10, 54.9355, -61.3976, -8.6982, 111.384, trial: 0, score: 2 trial: 1, score: 2 Policy 0: 16.6793, -61.1612, -1.50473, 45, -3, 14.9081, 2.28197, 82.9444, -29.8266, -5.20122, 122, 21.6848, -75, 9.76725, 41.097, -3.34356, -6.48694, -6.85075, 45.7912, -32.5904, 2.57218, 122, 20.7455, 0, 1.06783, -5.79688, -37.4481, 13.9387, -8.56931, 40.2323, 14.5987, -0.638098, 122, 32.7428, -38.8493, -4.73086, -15, -3, 56.9685, 6.88121, 78.0733, -16.9466, 7.32576, 122, 18.771, -51.3206, -14.3572, -5.99061, -3, 74.2624, 5.73362, 77.1328, -32.4325, 1.276, 111.17, 17.8274, -56.1454, -0.621512, -9.72528, -3, 58.7413, -8.31706, 63.4955, -39.262, -6.31666, 122, Average Score: 2 trial: 0, score: 6 trial: 1, score: 4 Policy 1: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Average Score: 5 trial: 0, score: 2 trial: 1, score: 1 Policy 2: 21.2435, -58.4655, 13.9038, 45, -3, 6.36047, 4.76786, 92.8761, -23.7699, 0.26661, 122, 27.8338, -75, 10.5339, 41.2155, -3, -29.7628, -10, 48.3104, -50.2869, 10.5031, 114.636, 21.9745, -4.21594, 9.19487, -9.26758, -42.6235, 26.1359, -10, 46.2686, -9.04041, -8.27534, 109.698, 37.5562, -36.0526, -3.41605, -15, -3, 64.6013, 3.39598, 81.1668, -0.452337, 4.0643, 114.911, 14.753, -56.2089, -20.127, -11.7498, -3, 61.8836, 3.85134, 88.2336, -33.2956, 9.71137, 108.644, 25.2768, -53.1493, -17.3279, -5.98419, -4.35534, 78.6586, -10, 62.4041, -33.404, -4.87344, 122, Average Score: 1.5 trial: 0, score: 0 trial: 1, score: 1 Policy 3: 14.5225, -61.5455, -8.65673, 45, -8.2341, -3.92979, -5.80922, 96.1101, -50.9586, 0.936094, 112.709, 27.8478, -68.3321, -3.03185, 35.568, -3, -13.2713, -10, 41.8007, -28.1689, -4.03092, 111.543, 16.3539, -0.241609, 18.1475, -5.99976, -37.0514, 23.875, -10, 35.2719, 22.3885, -9.95732, 118.287, 42.5517, -40.3999, 3.96604, -14.6008, -3.54336, 48.8489, 12.1448, 69.2231, -6.89759, 12.7245, 112.828, 28.1991, -56.314, -16.5411, -11.9624, -3, 56.3858, 1.78395, 67.4968, -22.1877, -6.5984, 122, 18.9477, -58.1482, 15.2849, -6.1313, -3, 40.1866, -10, 55.5131, -56.6964, -9.63162, 122, Average Score: 0.5 trial: 0, score: 1 trial: 1, score: 3 Policy 4: 7.19339, -58.1264, -18.4909, 44.4051, -5.95118, -9.05667, 5.30748, 81.4493, -40.7056, -3.89412, 111.06, 15.7558, -67.6557, -0.488741, 42.8995, -7.41167, -25.8211, -10, 40.0617, -16.0462, -5.12773, 122, 29.8806, 0, -12.8542, -3.46909, -40.539, 36.9756, -2.1533, 54.0605, 17.3857, -7.04076, 122, 28.6934, -33.7889, -16.1854, -10.8417, -7.94557, 57.9615, 2.81769, 81.3851, -13.5787, -1.36599, 111.177, 28.5551, -44.5651, -15.5232, -7.53566, -3, 68.8938, 15.0505, 87.2299, -47.8779, 10.8154, 122, 12.6828, -60.5772, -1.92744, -10.0497, -3, 38.3174, -10, 54.9355, -61.3976, -8.6982, 111.384, Average Score: 2 --------------------------------- New Iteration Current Best Policy: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Current Best Policy Score: 5 Policy 0 will be: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Policy 1 will be: 20.7862, -59.211, 11.9601, 45, -4.94455, 17.8134, -4.07063, 94.2706, -41.3799, -10, 122, 15.5554, -73.0609, 7.67359, 36.0159, -3, -0.66673, 4.33353, 35.5062, 1.14111, 3.6737, 115.174, 28.2643, 0, -1.88196, -7.07644, -44.9198, -14.2151, -3.24981, 54.9123, 8.18348, -2.51727, 122, 23.467, -35.5681, -16.2154, -15, -5.78469, 97.1638, 7.71097, 77.7258, -39.668, 8.65556, 116.707, 19.0135, -57.5291, -25.6424, 1.28102, -3, 46.6354, -8.63033, 82.7287, -32.1532, -10, 122, 19.8788, -58.302, -21.2759, -14.384, -5.27167, 52.4006, -1.63444, 65.656, -18.7899, 5.40899, 122, Policy 2 will be: 22.2597, -55.7938, -15.6275, 45, -9.74367, 27.9561, 10.5508, 106.653, -44.041, 3.72497, 113.192, 18.4249, -71.827, 29.9131, 45, -4.17576, 15.8013, 1.55498, 42.0734, -0.282409, 7.47308, 100.294, 29.7696, -1.59667, 5.66522, -1.19881, -40.2901, -12.6826, -10, 35.1342, 38.4558, -10, 117.94, 30.7534, -37.653, -1.95331, -14.1635, -6.92019, 82.6233, 2.79169, 97.2325, -24.0974, -2.18052, 102.982, 30.5847, -50.7077, -13.0762, 2.72037, -3, 64.3655, 2.24212, 74.9863, -27.9423, 6.00614, 122, 28.416, -53.0295, -5.6559, -15, -3, 38.2209, -10, 43.6148, -60.6474, 2.61502, 122, Policy 3 will be: 14.8732, -57.1192, -17.4813, 44.4371, -3.64701, 51.1678, 0.245045, 82.0211, -42.4611, -7.96686, 122, 23.3966, -73.5161, 17.9165, 42.327, -3, -14.6825, 7.65616, 37.1511, -14.2411, 2.69918, 104.77, 18.67, 0, 2.16902, -4.58415, -44.6919, -27.5521, -9.36127, 44.5686, 45.0433, -5.498, 120.612, 17.3697, -39.5492, -4.37306, -15, -6.35242, 93.2753, 2.07913, 99.9395, -17.3175, -7.67968, 118.341, 30.9769, -52.5518, -27.1733, 1.62503, -3.61488, 67.6847, 6.06213, 71.9163, -43.1284, 5.49398, 122, 23.9014, -64.7974, -16.8261, -15, -3, 72.2482, -7.65611, 45.0495, -38.6233, -3.87385, 122, Policy 4 will be: 20.2863, -51.9089, 8.62537, 39.7196, -6.43324, 50.0294, 10.8894, 82.4392, -19.6503, 4.17839, 101.893, 16.5311, -71.5145, 23.0631, 43.6155, -3, -2.4012, -1.30603, 37.0564, -7.66233, 1.75513, 101.187, 26.0248, -4.24723, 10.9441, -6.96008, -36.5046, -26.1411, -6.16355, 38.4965, 40.6256, -10, 122, 29.7526, -33.0304, -4.67911, -11.7768, -6.53137, 96.8142, 13.4235, 83.1042, -13.688, 9.21114, 120.591, 16.7995, -51.9372, -5.60598, -6.29429, -4.67117, 71.2788, 1.08153, 87.4366, -34.4229, 5.64053, 122, 30.1827, -63.3008, -8.62476, -15, -8.69208, 62.4073, -10, 57.4366, -39.8664, 0.689033, 122, trial: 0, score: 5 trial: 1, score: 4 Policy 0: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Average Score: 4.5 trial: 0, score: 3 trial: 1, score: 4 Policy 1: 20.7862, -59.211, 11.9601, 45, -4.94455, 17.8134, -4.07063, 94.2706, -41.3799, -10, 122, 15.5554, -73.0609, 7.67359, 36.0159, -3, -0.66673, 4.33353, 35.5062, 1.14111, 3.6737, 115.174, 28.2643, 0, -1.88196, -7.07644, -44.9198, -14.2151, -3.24981, 54.9123, 8.18348, -2.51727, 122, 23.467, -35.5681, -16.2154, -15, -5.78469, 97.1638, 7.71097, 77.7258, -39.668, 8.65556, 116.707, 19.0135, -57.5291, -25.6424, 1.28102, -3, 46.6354, -8.63033, 82.7287, -32.1532, -10, 122, 19.8788, -58.302, -21.2759, -14.384, -5.27167, 52.4006, -1.63444, 65.656, -18.7899, 5.40899, 122, Average Score: 3.5 trial: 0, score: 1 trial: 1, score: 2 Policy 2: 22.2597, -55.7938, -15.6275, 45, -9.74367, 27.9561, 10.5508, 106.653, -44.041, 3.72497, 113.192, 18.4249, -71.827, 29.9131, 45, -4.17576, 15.8013, 1.55498, 42.0734, -0.282409, 7.47308, 100.294, 29.7696, -1.59667, 5.66522, -1.19881, -40.2901, -12.6826, -10, 35.1342, 38.4558, -10, 117.94, 30.7534, -37.653, -1.95331, -14.1635, -6.92019, 82.6233, 2.79169, 97.2325, -24.0974, -2.18052, 102.982, 30.5847, -50.7077, -13.0762, 2.72037, -3, 64.3655, 2.24212, 74.9863, -27.9423, 6.00614, 122, 28.416, -53.0295, -5.6559, -15, -3, 38.2209, -10, 43.6148, -60.6474, 2.61502, 122, Average Score: 1.5 trial: 0, score: 3 trial: 1, score: 2 Policy 3: 14.8732, -57.1192, -17.4813, 44.4371, -3.64701, 51.1678, 0.245045, 82.0211, -42.4611, -7.96686, 122, 23.3966, -73.5161, 17.9165, 42.327, -3, -14.6825, 7.65616, 37.1511, -14.2411, 2.69918, 104.77, 18.67, 0, 2.16902, -4.58415, -44.6919, -27.5521, -9.36127, 44.5686, 45.0433, -5.498, 120.612, 17.3697, -39.5492, -4.37306, -15, -6.35242, 93.2753, 2.07913, 99.9395, -17.3175, -7.67968, 118.341, 30.9769, -52.5518, -27.1733, 1.62503, -3.61488, 67.6847, 6.06213, 71.9163, -43.1284, 5.49398, 122, 23.9014, -64.7974, -16.8261, -15, -3, 72.2482, -7.65611, 45.0495, -38.6233, -3.87385, 122, Average Score: 2.5 trial: 0, score: 1 trial: 1, score: 5 Policy 4: 20.2863, -51.9089, 8.62537, 39.7196, -6.43324, 50.0294, 10.8894, 82.4392, -19.6503, 4.17839, 101.893, 16.5311, -71.5145, 23.0631, 43.6155, -3, -2.4012, -1.30603, 37.0564, -7.66233, 1.75513, 101.187, 26.0248, -4.24723, 10.9441, -6.96008, -36.5046, -26.1411, -6.16355, 38.4965, 40.6256, -10, 122, 29.7526, -33.0304, -4.67911, -11.7768, -6.53137, 96.8142, 13.4235, 83.1042, -13.688, 9.21114, 120.591, 16.7995, -51.9372, -5.60598, -6.29429, -4.67117, 71.2788, 1.08153, 87.4366, -34.4229, 5.64053, 122, 30.1827, -63.3008, -8.62476, -15, -8.69208, 62.4073, -10, 57.4366, -39.8664, 0.689033, 122, Average Score: 3 --------------------------------- New Iteration Current Best Policy: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Current Best Policy Score: 4.5 Policy 0 will be: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Policy 1 will be: 14.596, -61.5483, -12.6303, 45, -3.3057, 31.5309, 4.87555, 98.809, -39.5945, -2.12187, 122, 29.9956, -68.8337, 13.9227, 43.8593, -4.04002, 9.17154, -0.197872, 52.7683, -38.2176, -1.8001, 111.475, 19.5551, 0, -6.01021, -6.89679, -34.3895, -6.90475, -10, 59.0623, 10.2256, -10, 122, 22.3338, -31.5583, 2.30327, -14.6845, -8.69518, 64.6304, 7.41298, 85.8341, -35.7099, 9.45985, 116.169, 28.8279, -50.0991, -22.3948, -0.712555, -3.01114, 55.6042, -10, 73.135, -41.2579, 0.721486, 122, 32.2504, -53.0002, -21.7434, -8.36455, -4.42039, 39.9902, -7.36476, 55.629, -35.6558, -2.97224, 122, Policy 2 will be: 13.2669, -55.5943, -0.697828, 45, -3, 9.05313, 11.9312, 93.8675, -44.5872, -10, 111.271, 16.0558, -75, 9.96884, 45, -5.69551, 6.54353, 4.47954, 52.2783, 3.27676, 2.94207, 100.691, 14.4757, 0, 21.7506, -11.37, -34.3496, 10.6759, -9.69691, 40.3021, 40.0275, -8.64037, 107.568, 17.9709, -37.6239, -11.5278, -14.3312, -10.4896, 93.6494, 2.42986, 73.6953, -27.0315, -2.67362, 122, 15.9027, -59.2357, -28.6726, -4.1624, -8.48181, 87.3453, 1.06625, 84.6558, -32.5398, 5.5404, 122, 35.3623, -56.2154, -19.0655, -7.83277, -4.2742, 67.8694, -10, 51.4502, -53.9199, 0.0672831, 122, Policy 3 will be: 12.9445, -56.967, 9.87615, 45, -3, 41.4722, -3.55735, 79.6494, -32.5414, 1.11514, 102.593, 15.9385, -73.5261, 13.5677, 36.2814, -4.37802, -20.0132, 8.55917, 47.8114, -40.5436, 14.404, 96.8668, 23.1446, 0, 11.6527, -10.2173, -43.8341, 13.9644, -10, 36.2855, 18.1731, -3.78837, 122, 27.6213, -36.7497, -2.04147, -14.4741, -10.0658, 55.1146, 6.98012, 81.7635, -4.40067, -3.46593, 113.369, 23.4314, -59.7934, -33.7214, -0.790547, -3, 92.8959, -10, 88.1834, -1.92579, 4.18771, 122, 25.3775, -65.1334, -21.172, -15, -4.51295, 40.287, -10, 62.8377, -48.9957, -8.38264, 122, Policy 4 will be: 17.1842, -57.388, 13.1229, 45, -8.17283, 40.0809, 2.89664, 99.4053, -6.19634, 6.56464, 112.36, 26.6013, -75, 17.507, 36.0491, -3, 6.29794, 6.2745, 49.3374, -43.7289, -0.959815, 121.863, 11.0692, 0, 1.07375, -5.39489, -37.7116, 7.55699, -10, 45.7644, 27.1216, 1.22191, 110.118, 21.8071, -38.4428, 1.26088, -15, -4.83881, 70.9528, 13.3265, 77.5031, 3.496, -2.02941, 102.818, 30.7886, -50.4955, -34.9588, -5.31117, -8.39111, 58.6186, 2.93382, 72.0793, -6.12116, 3.67195, 122, 19.2121, -60.1482, -5.15757, -15, -3, 74.4683, -10, 52.3812, -34.1968, -0.0870751, 120.09, trial: 0, score: 4 trial: 1, score: 5 Policy 0: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Average Score: 4.5 trial: 0, score: 1 trial: 1, score: 0 Policy 1: 14.596, -61.5483, -12.6303, 45, -3.3057, 31.5309, 4.87555, 98.809, -39.5945, -2.12187, 122, 29.9956, -68.8337, 13.9227, 43.8593, -4.04002, 9.17154, -0.197872, 52.7683, -38.2176, -1.8001, 111.475, 19.5551, 0, -6.01021, -6.89679, -34.3895, -6.90475, -10, 59.0623, 10.2256, -10, 122, 22.3338, -31.5583, 2.30327, -14.6845, -8.69518, 64.6304, 7.41298, 85.8341, -35.7099, 9.45985, 116.169, 28.8279, -50.0991, -22.3948, -0.712555, -3.01114, 55.6042, -10, 73.135, -41.2579, 0.721486, 122, 32.2504, -53.0002, -21.7434, -8.36455, -4.42039, 39.9902, -7.36476, 55.629, -35.6558, -2.97224, 122, Average Score: 0.5 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 13.2669, -55.5943, -0.697828, 45, -3, 9.05313, 11.9312, 93.8675, -44.5872, -10, 111.271, 16.0558, -75, 9.96884, 45, -5.69551, 6.54353, 4.47954, 52.2783, 3.27676, 2.94207, 100.691, 14.4757, 0, 21.7506, -11.37, -34.3496, 10.6759, -9.69691, 40.3021, 40.0275, -8.64037, 107.568, 17.9709, -37.6239, -11.5278, -14.3312, -10.4896, 93.6494, 2.42986, 73.6953, -27.0315, -2.67362, 122, 15.9027, -59.2357, -28.6726, -4.1624, -8.48181, 87.3453, 1.06625, 84.6558, -32.5398, 5.5404, 122, 35.3623, -56.2154, -19.0655, -7.83277, -4.2742, 67.8694, -10, 51.4502, -53.9199, 0.0672831, 122, Average Score: 0 trial: 0, score: 1 trial: 1, score: 1 Policy 3: 12.9445, -56.967, 9.87615, 45, -3, 41.4722, -3.55735, 79.6494, -32.5414, 1.11514, 102.593, 15.9385, -73.5261, 13.5677, 36.2814, -4.37802, -20.0132, 8.55917, 47.8114, -40.5436, 14.404, 96.8668, 23.1446, 0, 11.6527, -10.2173, -43.8341, 13.9644, -10, 36.2855, 18.1731, -3.78837, 122, 27.6213, -36.7497, -2.04147, -14.4741, -10.0658, 55.1146, 6.98012, 81.7635, -4.40067, -3.46593, 113.369, 23.4314, -59.7934, -33.7214, -0.790547, -3, 92.8959, -10, 88.1834, -1.92579, 4.18771, 122, 25.3775, -65.1334, -21.172, -15, -4.51295, 40.287, -10, 62.8377, -48.9957, -8.38264, 122, Average Score: 1 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 17.1842, -57.388, 13.1229, 45, -8.17283, 40.0809, 2.89664, 99.4053, -6.19634, 6.56464, 112.36, 26.6013, -75, 17.507, 36.0491, -3, 6.29794, 6.2745, 49.3374, -43.7289, -0.959815, 121.863, 11.0692, 0, 1.07375, -5.39489, -37.7116, 7.55699, -10, 45.7644, 27.1216, 1.22191, 110.118, 21.8071, -38.4428, 1.26088, -15, -4.83881, 70.9528, 13.3265, 77.5031, 3.496, -2.02941, 102.818, 30.7886, -50.4955, -34.9588, -5.31117, -8.39111, 58.6186, 2.93382, 72.0793, -6.12116, 3.67195, 122, 19.2121, -60.1482, -5.15757, -15, -3, 74.4683, -10, 52.3812, -34.1968, -0.0870751, 120.09, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Current Best Policy Score: 4.5 Policy 0 will be: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Policy 1 will be: 9.84262, -50.9574, -1.54777, 45, -3.71878, 46.1443, -0.104614, 108.132, -31.6679, -7.4189, 116.976, 27.774, -67.7702, 3.09405, 37.107, -3, -19.2936, 2.72242, 52.8089, -19.9181, 10.7784, 117.89, 10.9752, -5.57259, 3.13988, -7.49034, -43.595, 0.0215968, -2.04939, 41.2769, 40.1083, -10, 115.62, 24.9586, -38.1565, 0.874594, -15, -9.168, 95.9743, 13.8165, 97.6121, -37.1601, 6.37598, 119.651, 27.6594, -50.9957, -12.8429, -6.01173, -3.93804, 71.437, -2.90385, 65.5588, 4.50006, 4.30233, 116.166, 23.1335, -53.7847, -1.6767, -10.9701, -3, 70.1034, -10, 50.8768, -20.477, -10, 122, Policy 2 will be: 25.9691, -53.9753, 6.67171, 39.7799, -3, 47.0528, -4.13224, 100.346, -18.9165, -5.66347, 102.87, 12.1007, -69.2884, 29.4038, 37.2375, -3, 14.1602, -0.491986, 53.6923, 0.13611, 10.693, 118.252, 20.0373, -4.47718, 8.95599, -8.90254, -39.2336, 5.5693, -10, 53.9012, 48.8512, -6.872, 122, 18.5972, -31.5759, -18.652, -15, -3.22242, 59.109, 4.97454, 94.4598, -19.799, -1.9613, 111.685, 23.2609, -59.9465, -18.3208, -4.82721, -3, 53.9243, -8.97693, 62.185, -22.8598, -0.512991, 111.763, 22.6136, -59.2992, -0.591891, -10.8947, -3, 68.4466, -10, 57.2143, -43.4516, -10, 114.657, Policy 3 will be: 23.9359, -60.2163, 8.07399, 45, -3, 23.2798, -2.86827, 102.995, -28.1285, -0.691459, 115.542, 21.4191, -75, 26.897, 37.9459, -7.05019, -19.5408, 3.00106, 31.7652, -34.9925, -0.827965, 118.522, 26.7297, -6.13997, 7.53019, -1.93771, -43.6058, 3.866, -10, 47.8792, 6.18318, -10, 122, 26.3923, -32.4677, 7.779, -15, -10.6194, 82.6643, 5.21146, 82.4257, -37.5166, 4.43918, 117.541, 20.4766, -49.9167, -8.8233, -1.88585, -5.57496, 80.5127, 0.202092, 77.8519, -2.59693, -8.82185, 122, 31.2818, -63.9884, -32.9879, -15, -3, 50.5207, -10, 51.558, -16.4773, -6.33019, 122, Policy 4 will be: 14.9856, -59.5721, -16.3478, 45, -3, 43.6444, -1.74434, 96.1283, -37.0575, -9.84997, 118.127, 17.4156, -72.2137, 12.8008, 40.7414, -3, 16.7448, -4.47357, 29.298, -17.9513, 3.52433, 99.9273, 23.4958, -3.50199, -6.8253, -1.00888, -37.0143, -7.82256, -7.66095, 44.9053, 42.1678, -2.63651, 117.121, 18.861, -29.4353, 5.16297, -14.9557, -10.4615, 81.2913, 13.471, 93.3121, 5.36035, -5.06205, 122, 31.7202, -52.0109, -23.9552, 0.249514, -5.31496, 47.8716, -10, 72.5973, -38.7396, 5.77078, 122, 35.2029, -58.406, -12.2635, -13.8665, -7.63076, 66.4387, -3.16206, 59.7294, -28.8296, 1.34401, 122, trial: 0, score: 2 trial: 1, score: 3 Policy 0: 17.9781, -57.1498, -0.821625, 45, -4.37721, 31.9591, 3.07768, 93.9625, -25.0367, -2.93135, 113.403, 20.7177, -75, 19.7332, 41.866, -3, 1.12098, -0.444786, 42.3114, -20.3643, 7.59237, 107.5, 19.974, 0, 10.4443, -5.39317, -39.2638, -5.62676, -10, 49.6936, 24.7485, -5.75763, 122, 26.4787, -32.1114, -8.1456, -15, -6.44389, 79.3195, 6.98085, 85.7515, -18.7438, 1.96863, 117.38, 24.6055, -53.9319, -22.3901, -1.54567, -3, 68.9273, -3.70188, 74.6093, -18.709, -3.12379, 121.852, 25.4123, -60.4832, -15.516, -13.6659, -3, 54.37, -9.04722, 53.3719, -36.3753, -3.37813, 122, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 9.84262, -50.9574, -1.54777, 45, -3.71878, 46.1443, -0.104614, 108.132, -31.6679, -7.4189, 116.976, 27.774, -67.7702, 3.09405, 37.107, -3, -19.2936, 2.72242, 52.8089, -19.9181, 10.7784, 117.89, 10.9752, -5.57259, 3.13988, -7.49034, -43.595, 0.0215968, -2.04939, 41.2769, 40.1083, -10, 115.62, 24.9586, -38.1565, 0.874594, -15, -9.168, 95.9743, 13.8165, 97.6121, -37.1601, 6.37598, 119.651, 27.6594, -50.9957, -12.8429, -6.01173, -3.93804, 71.437, -2.90385, 65.5588, 4.50006, 4.30233, 116.166, 23.1335, -53.7847, -1.6767, -10.9701, -3, 70.1034, -10, 50.8768, -20.477, -10, 122, Average Score: 0 trial: 0, score: 0 trial: 1, score: 2 Policy 2: 25.9691, -53.9753, 6.67171, 39.7799, -3, 47.0528, -4.13224, 100.346, -18.9165, -5.66347, 102.87, 12.1007, -69.2884, 29.4038, 37.2375, -3, 14.1602, -0.491986, 53.6923, 0.13611, 10.693, 118.252, 20.0373, -4.47718, 8.95599, -8.90254, -39.2336, 5.5693, -10, 53.9012, 48.8512, -6.872, 122, 18.5972, -31.5759, -18.652, -15, -3.22242, 59.109, 4.97454, 94.4598, -19.799, -1.9613, 111.685, 23.2609, -59.9465, -18.3208, -4.82721, -3, 53.9243, -8.97693, 62.185, -22.8598, -0.512991, 111.763, 22.6136, -59.2992, -0.591891, -10.8947, -3, 68.4466, -10, 57.2143, -43.4516, -10, 114.657, Average Score: 1 trial: 0, score: 5 trial: 1, score: 6 Policy 3: 23.9359, -60.2163, 8.07399, 45, -3, 23.2798, -2.86827, 102.995, -28.1285, -0.691459, 115.542, 21.4191, -75, 26.897, 37.9459, -7.05019, -19.5408, 3.00106, 31.7652, -34.9925, -0.827965, 118.522, 26.7297, -6.13997, 7.53019, -1.93771, -43.6058, 3.866, -10, 47.8792, 6.18318, -10, 122, 26.3923, -32.4677, 7.779, -15, -10.6194, 82.6643, 5.21146, 82.4257, -37.5166, 4.43918, 117.541, 20.4766, -49.9167, -8.8233, -1.88585, -5.57496, 80.5127, 0.202092, 77.8519, -2.59693, -8.82185, 122, 31.2818, -63.9884, -32.9879, -15, -3, 50.5207, -10, 51.558, -16.4773, -6.33019, 122, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 14.9856, -59.5721, -16.3478, 45, -3, 43.6444, -1.74434, 96.1283, -37.0575, -9.84997, 118.127, 17.4156, -72.2137, 12.8008, 40.7414, -3, 16.7448, -4.47357, 29.298, -17.9513, 3.52433, 99.9273, 23.4958, -3.50199, -6.8253, -1.00888, -37.0143, -7.82256, -7.66095, 44.9053, 42.1678, -2.63651, 117.121, 18.861, -29.4353, 5.16297, -14.9557, -10.4615, 81.2913, 13.471, 93.3121, 5.36035, -5.06205, 122, 31.7202, -52.0109, -23.9552, 0.249514, -5.31496, 47.8716, -10, 72.5973, -38.7396, 5.77078, 122, 35.2029, -58.406, -12.2635, -13.8665, -7.63076, 66.4387, -3.16206, 59.7294, -28.8296, 1.34401, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 23.9359, -60.2163, 8.07399, 45, -3, 23.2798, -2.86827, 102.995, -28.1285, -0.691459, 115.542, 21.4191, -75, 26.897, 37.9459, -7.05019, -19.5408, 3.00106, 31.7652, -34.9925, -0.827965, 118.522, 26.7297, -6.13997, 7.53019, -1.93771, -43.6058, 3.866, -10, 47.8792, 6.18318, -10, 122, 26.3923, -32.4677, 7.779, -15, -10.6194, 82.6643, 5.21146, 82.4257, -37.5166, 4.43918, 117.541, 20.4766, -49.9167, -8.8233, -1.88585, -5.57496, 80.5127, 0.202092, 77.8519, -2.59693, -8.82185, 122, 31.2818, -63.9884, -32.9879, -15, -3, 50.5207, -10, 51.558, -16.4773, -6.33019, 122, Current Best Policy Score: 5.5 Policy 0 will be: 23.9359, -60.2163, 8.07399, 45, -3, 23.2798, -2.86827, 102.995, -28.1285, -0.691459, 115.542, 21.4191, -75, 26.897, 37.9459, -7.05019, -19.5408, 3.00106, 31.7652, -34.9925, -0.827965, 118.522, 26.7297, -6.13997, 7.53019, -1.93771, -43.6058, 3.866, -10, 47.8792, 6.18318, -10, 122, 26.3923, -32.4677, 7.779, -15, -10.6194, 82.6643, 5.21146, 82.4257, -37.5166, 4.43918, 117.541, 20.4766, -49.9167, -8.8233, -1.88585, -5.57496, 80.5127, 0.202092, 77.8519, -2.59693, -8.82185, 122, 31.2818, -63.9884, -32.9879, -15, -3, 50.5207, -10, 51.558, -16.4773, -6.33019, 122, Policy 1 will be: 18.5443, -56.1208, 8.92192, 45, -3, 42.6197, -3.38092, 103.146, -13.6233, 8.22857, 122, 17.0852, -68.0696, 30.2222, 36.883, -9.53939, -36.514, 6.21251, 29.4923, -26.2007, 6.5903, 118.284, 22.0439, -7.16194, 19.9849, 1.43994, -44.6665, 9.56384, -2.35815, 53.9735, 11.4398, -10, 122, 29.4744, -32.088, 10.5453, -15, -15.6599, 101.701, 1.26019, 91.0211, -54.7298, 12.1598, 107.177, 12.5072, -55.0242, -20.5775, 1.24853, -6.64495, 63.0537, 7.04217, 63.5735, -11.3299, -10, 108.655, 29.2187, -62.3997, -30.1724, -15, -3.50656, 66.6635, -10, 48.8729, -31.0454, -1.50783, 114.086, Policy 2 will be: 31.1629, -62.758, 11.6976, 45, -3, 27.0215, -0.160594, 108.312, -38.7988, -8.84213, 114.236, 20.2769, -75, 15.904, 38.7131, -3.86598, -7.24248, 8.69533, 29.8627, -37.089, -5.50596, 109.688, 35.7755, -13.2542, -4.81136, -6.82016, -41.781, 4.80503, -8.3511, 44.6559, -7.55007, -10, 114.146, 26.0183, -28.7036, -5.6126, -15, -14.113, 77.0934, 3.29251, 69.2612, -20.3086, 6.60083, 104.581, 23.8691, -53.4764, -2.90483, -1.26491, -9.80697, 83.6482, -7.94649, 78.5707, -8.40561, -5.95177, 113.129, 27.6828, -63.9273, -27.1224, -15, -3.50093, 68.2881, -10, 51.5097, -1.05541, -10, 122, Policy 3 will be: 15.6028, -65.5516, 8.68113, 45, -3, 45.8601, 5.19518, 110.337, -44.2005, -7.11493, 109.94, 16.6888, -72.7913, 21.4821, 32.6034, -8.84924, -33.2969, 0.325355, 19.9162, -36.2455, 2.00535, 122, 19.9824, -11.1696, 12.2192, -6.67481, -44.6362, -19.6794, -2.53133, 39.9936, 20.3748, -10, 110.594, 36.2795, -31.4866, 1.74875, -9.86302, -15.5649, 68.7697, -2.78228, 74.0985, -45.3665, 12.3836, 106.467, 15.34, -49.4666, 6.1253, 2.22396, -3, 62.077, -9.46703, 76.428, 20.0462, -10, 122, 25.6868, -59.7846, -29.4866, -15, -3, 69.5765, -1.76148, 61.8265, -40.2633, -2.41666, 122, Policy 4 will be: 21.9485, -57.5411, 12.1689, 43.8979, -5.16688, 37.4416, 2.94399, 115.674, -24.1116, 4.23723, 107.834, 20.8768, -71.5577, 32.9514, 38.7957, -5.41157, -21.6278, 4.45553, 21.629, -10.6121, 8.83223, 115.887, 19.2644, -6.66154, 20.1815, 0.263735, -45.7332, 10.4725, -10, 51.1614, -12.8051, -6.22143, 121.222, 29.3527, -34.8087, 24.1289, -14.268, -6.52991, 79.8253, 9.084, 68.1991, -38.388, 9.66572, 103.987, 13.4739, -52.3098, 4.38412, 0.417674, -3, 95.447, -8.97946, 68.4628, -25.3275, -4.45058, 122, 34.6645, -58.261, -28.3882, -15, -3, 57.6872, -10, 53.3622, -38.6085, -10, 116.21, trial: 0, score: 4 trial: 1, score: 1 Policy 0: 23.9359, -60.2163, 8.07399, 45, -3, 23.2798, -2.86827, 102.995, -28.1285, -0.691459, 115.542, 21.4191, -75, 26.897, 37.9459, -7.05019, -19.5408, 3.00106, 31.7652, -34.9925, -0.827965, 118.522, 26.7297, -6.13997, 7.53019, -1.93771, -43.6058, 3.866, -10, 47.8792, 6.18318, -10, 122, 26.3923, -32.4677, 7.779, -15, -10.6194, 82.6643, 5.21146, 82.4257, -37.5166, 4.43918, 117.541, 20.4766, -49.9167, -8.8233, -1.88585, -5.57496, 80.5127, 0.202092, 77.8519, -2.59693, -8.82185, 122, 31.2818, -63.9884, -32.9879, -15, -3, 50.5207, -10, 51.558, -16.4773, -6.33019, 122, Average Score: 2.5 trial: 0, score: 2 trial: 1, score: 2 Policy 1: 18.5443, -56.1208, 8.92192, 45, -3, 42.6197, -3.38092, 103.146, -13.6233, 8.22857, 122, 17.0852, -68.0696, 30.2222, 36.883, -9.53939, -36.514, 6.21251, 29.4923, -26.2007, 6.5903, 118.284, 22.0439, -7.16194, 19.9849, 1.43994, -44.6665, 9.56384, -2.35815, 53.9735, 11.4398, -10, 122, 29.4744, -32.088, 10.5453, -15, -15.6599, 101.701, 1.26019, 91.0211, -54.7298, 12.1598, 107.177, 12.5072, -55.0242, -20.5775, 1.24853, -6.64495, 63.0537, 7.04217, 63.5735, -11.3299, -10, 108.655, 29.2187, -62.3997, -30.1724, -15, -3.50656, 66.6635, -10, 48.8729, -31.0454, -1.50783, 114.086, Average Score: 2 trial: 0, score: 6 trial: 1, score: 4 Policy 2: 31.1629, -62.758, 11.6976, 45, -3, 27.0215, -0.160594, 108.312, -38.7988, -8.84213, 114.236, 20.2769, -75, 15.904, 38.7131, -3.86598, -7.24248, 8.69533, 29.8627, -37.089, -5.50596, 109.688, 35.7755, -13.2542, -4.81136, -6.82016, -41.781, 4.80503, -8.3511, 44.6559, -7.55007, -10, 114.146, 26.0183, -28.7036, -5.6126, -15, -14.113, 77.0934, 3.29251, 69.2612, -20.3086, 6.60083, 104.581, 23.8691, -53.4764, -2.90483, -1.26491, -9.80697, 83.6482, -7.94649, 78.5707, -8.40561, -5.95177, 113.129, 27.6828, -63.9273, -27.1224, -15, -3.50093, 68.2881, -10, 51.5097, -1.05541, -10, 122, Average Score: 5 trial: 0, score: 1 trial: 1, score: 4 Policy 3: 15.6028, -65.5516, 8.68113, 45, -3, 45.8601, 5.19518, 110.337, -44.2005, -7.11493, 109.94, 16.6888, -72.7913, 21.4821, 32.6034, -8.84924, -33.2969, 0.325355, 19.9162, -36.2455, 2.00535, 122, 19.9824, -11.1696, 12.2192, -6.67481, -44.6362, -19.6794, -2.53133, 39.9936, 20.3748, -10, 110.594, 36.2795, -31.4866, 1.74875, -9.86302, -15.5649, 68.7697, -2.78228, 74.0985, -45.3665, 12.3836, 106.467, 15.34, -49.4666, 6.1253, 2.22396, -3, 62.077, -9.46703, 76.428, 20.0462, -10, 122, 25.6868, -59.7846, -29.4866, -15, -3, 69.5765, -1.76148, 61.8265, -40.2633, -2.41666, 122, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 1 Policy 4: 21.9485, -57.5411, 12.1689, 43.8979, -5.16688, 37.4416, 2.94399, 115.674, -24.1116, 4.23723, 107.834, 20.8768, -71.5577, 32.9514, 38.7957, -5.41157, -21.6278, 4.45553, 21.629, -10.6121, 8.83223, 115.887, 19.2644, -6.66154, 20.1815, 0.263735, -45.7332, 10.4725, -10, 51.1614, -12.8051, -6.22143, 121.222, 29.3527, -34.8087, 24.1289, -14.268, -6.52991, 79.8253, 9.084, 68.1991, -38.388, 9.66572, 103.987, 13.4739, -52.3098, 4.38412, 0.417674, -3, 95.447, -8.97946, 68.4628, -25.3275, -4.45058, 122, 34.6645, -58.261, -28.3882, -15, -3, 57.6872, -10, 53.3622, -38.6085, -10, 116.21, Average Score: 0.5 --------------------------------- New Iteration Current Best Policy: 31.1629, -62.758, 11.6976, 45, -3, 27.0215, -0.160594, 108.312, -38.7988, -8.84213, 114.236, 20.2769, -75, 15.904, 38.7131, -3.86598, -7.24248, 8.69533, 29.8627, -37.089, -5.50596, 109.688, 35.7755, -13.2542, -4.81136, -6.82016, -41.781, 4.80503, -8.3511, 44.6559, -7.55007, -10, 114.146, 26.0183, -28.7036, -5.6126, -15, -14.113, 77.0934, 3.29251, 69.2612, -20.3086, 6.60083, 104.581, 23.8691, -53.4764, -2.90483, -1.26491, -9.80697, 83.6482, -7.94649, 78.5707, -8.40561, -5.95177, 113.129, 27.6828, -63.9273, -27.1224, -15, -3.50093, 68.2881, -10, 51.5097, -1.05541, -10, 122, Current Best Policy Score: 5 Policy 0 will be: 31.1629, -62.758, 11.6976, 45, -3, 27.0215, -0.160594, 108.312, -38.7988, -8.84213, 114.236, 20.2769, -75, 15.904, 38.7131, -3.86598, -7.24248, 8.69533, 29.8627, -37.089, -5.50596, 109.688, 35.7755, -13.2542, -4.81136, -6.82016, -41.781, 4.80503, -8.3511, 44.6559, -7.55007, -10, 114.146, 26.0183, -28.7036, -5.6126, -15, -14.113, 77.0934, 3.29251, 69.2612, -20.3086, 6.60083, 104.581, 23.8691, -53.4764, -2.90483, -1.26491, -9.80697, 83.6482, -7.94649, 78.5707, -8.40561, -5.95177, 113.129, 27.6828, -63.9273, -27.1224, -15, -3.50093, 68.2881, -10, 51.5097, -1.05541, -10, 122, Policy 1 will be: 21.452, -62.0798, 7.07969, 42.7156, -3, 13.8577, 8.75832, 106.766, -19.152, -3.27199, 116.968, 11.7782, -67.7282, 23.5098, 34.1164, -7.92303, -3.60909, 3.9876, 17.5518, -31.4524, -10, 102.926, 38.0503, -11.9115, 8.05369, -10.9149, -47.1783, -4.42239, -10, 54.1718, -3.03636, -2.93775, 105.346, 32.7248, -32.6939, -19.9578, -15, -12.0196, 63.3914, 3.01576, 80.888, -8.66905, 15.1142, 94.7278, 23.7827, -56.5385, 3.10259, -2.9066, -11.1101, 104.552, 0.380802, 92.1172, -18.4799, -3.4033, 113.538, 20.24, -58.4764, -40.7608, -14.0576, -4.40856, 46.9635, -10, 47.1875, 3.82771, -2.82564, 121.025, Policy 2 will be: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Policy 3 will be: 31.3636, -63.8787, 3.46736, 45, -3, 28.9155, 2.18796, 103.506, -27.2284, -10, 116.056, 24.8947, -75, 28.7505, 36.6337, -4.57454, -2.6298, 9.93626, 28.1454, -57.8334, -4.88091, 95.5302, 33.5789, -16.4527, -20.7232, -6.25263, -44.5112, -6.39989, 0.25022, 30.8337, -18.3998, -10, 106.627, 16.6511, -32.1899, 11.0755, -15, -10.2168, 68.7823, 9.12329, 78.6296, -18.9625, 7.58989, 95.1188, 21.306, -51.0861, 5.22857, -5.42634, -6.6697, 106.503, -10, 87.8798, 15.1436, -3.11064, 110.773, 31.3796, -68.4104, -40.0337, -13.3557, -3, 70.2945, -10, 43.197, 13.9701, -10, 113.1, Policy 4 will be: 37.9944, -67.0714, 18.4091, 45, -3, 40.2196, 6.22179, 94.1353, -50.9675, -10, 122, 29.9911, -71.6562, 18.6423, 34.9278, -3, 6.37835, 16.619, 43.0825, -56.6648, -1.12257, 102.286, 35.199, -9.7481, 11.6687, -7.73477, -47.1467, 13.437, -10, 56.1664, 7.60512, -10, 122, 26.0059, -26.5882, 9.03683, -15, -8.81128, 89.8121, 5.66595, 74.5869, -24.3725, 10.6376, 89.9279, 26.0881, -46.6397, -6.60628, -4.469, -12.4299, 97.3487, -9.37481, 90.8845, -12.3069, -10, 104.159, 37.3871, -62.6053, -39.9748, -15, -3, 70.4058, -10, 47.3798, 6.46386, -1.99443, 111.211, trial: 0, score: 0 trial: 1, score: 0 Policy 0: 31.1629, -62.758, 11.6976, 45, -3, 27.0215, -0.160594, 108.312, -38.7988, -8.84213, 114.236, 20.2769, -75, 15.904, 38.7131, -3.86598, -7.24248, 8.69533, 29.8627, -37.089, -5.50596, 109.688, 35.7755, -13.2542, -4.81136, -6.82016, -41.781, 4.80503, -8.3511, 44.6559, -7.55007, -10, 114.146, 26.0183, -28.7036, -5.6126, -15, -14.113, 77.0934, 3.29251, 69.2612, -20.3086, 6.60083, 104.581, 23.8691, -53.4764, -2.90483, -1.26491, -9.80697, 83.6482, -7.94649, 78.5707, -8.40561, -5.95177, 113.129, 27.6828, -63.9273, -27.1224, -15, -3.50093, 68.2881, -10, 51.5097, -1.05541, -10, 122, Average Score: 0 trial: 0, score: 1 trial: 1, score: 2 Policy 1: 21.452, -62.0798, 7.07969, 42.7156, -3, 13.8577, 8.75832, 106.766, -19.152, -3.27199, 116.968, 11.7782, -67.7282, 23.5098, 34.1164, -7.92303, -3.60909, 3.9876, 17.5518, -31.4524, -10, 102.926, 38.0503, -11.9115, 8.05369, -10.9149, -47.1783, -4.42239, -10, 54.1718, -3.03636, -2.93775, 105.346, 32.7248, -32.6939, -19.9578, -15, -12.0196, 63.3914, 3.01576, 80.888, -8.66905, 15.1142, 94.7278, 23.7827, -56.5385, 3.10259, -2.9066, -11.1101, 104.552, 0.380802, 92.1172, -18.4799, -3.4033, 113.538, 20.24, -58.4764, -40.7608, -14.0576, -4.40856, 46.9635, -10, 47.1875, 3.82771, -2.82564, 121.025, Average Score: 1.5 trial: 0, score: 5 trial: 1, score: 6 Policy 2: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 31.3636, -63.8787, 3.46736, 45, -3, 28.9155, 2.18796, 103.506, -27.2284, -10, 116.056, 24.8947, -75, 28.7505, 36.6337, -4.57454, -2.6298, 9.93626, 28.1454, -57.8334, -4.88091, 95.5302, 33.5789, -16.4527, -20.7232, -6.25263, -44.5112, -6.39989, 0.25022, 30.8337, -18.3998, -10, 106.627, 16.6511, -32.1899, 11.0755, -15, -10.2168, 68.7823, 9.12329, 78.6296, -18.9625, 7.58989, 95.1188, 21.306, -51.0861, 5.22857, -5.42634, -6.6697, 106.503, -10, 87.8798, 15.1436, -3.11064, 110.773, 31.3796, -68.4104, -40.0337, -13.3557, -3, 70.2945, -10, 43.197, 13.9701, -10, 113.1, Average Score: 0 trial: 0, score: 5 trial: 1, score: 3 Policy 4: 37.9944, -67.0714, 18.4091, 45, -3, 40.2196, 6.22179, 94.1353, -50.9675, -10, 122, 29.9911, -71.6562, 18.6423, 34.9278, -3, 6.37835, 16.619, 43.0825, -56.6648, -1.12257, 102.286, 35.199, -9.7481, 11.6687, -7.73477, -47.1467, 13.437, -10, 56.1664, 7.60512, -10, 122, 26.0059, -26.5882, 9.03683, -15, -8.81128, 89.8121, 5.66595, 74.5869, -24.3725, 10.6376, 89.9279, 26.0881, -46.6397, -6.60628, -4.469, -12.4299, 97.3487, -9.37481, 90.8845, -12.3069, -10, 104.159, 37.3871, -62.6053, -39.9748, -15, -3, 70.4058, -10, 47.3798, 6.46386, -1.99443, 111.211, Average Score: 4 --------------------------------- New Iteration Current Best Policy: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Current Best Policy Score: 5.5 Policy 0 will be: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Policy 1 will be: 26.5142, -60.6249, 21.9451, 45, -6.86381, 39.0169, 3.36364, 109.374, -52.7869, 2.29451, 118.627, 14.5809, -75, 28.3931, 33.0633, -6.70136, 32.7747, 1.68911, 54.4919, -26.7414, -10, 114.027, 48.4924, -14.1056, 3.06583, -1.78823, -36.7077, -1.50343, -10, 61.2624, -49.7741, -10, 94.7378, 20.5789, -36.1144, 21.9639, -12.6412, -12.4196, 40.3998, -6.74175, 79.4412, 1.36145, 16.2272, 84.618, 34.066, -53.6692, -4.6244, -4.31162, -3, 86.9618, 4.97056, 79.2127, 22.3661, 5.89418, 118.406, 40.311, -67.9363, -27.2721, -11.8684, -5.75672, 38.4297, -6.57885, 46.7641, 15.5647, 1.1299, 120.926, Policy 2 will be: 25.0347, -65.3847, 20.498, 40.673, -3, 45.6387, 4.1773, 122, -51.9196, 3.74239, 106.937, 13.4631, -74.1803, 6.01936, 31.9016, -8.08297, -8.385, -4.38868, 32.4405, -58.5748, -10, 90.4101, 53.4691, -21.1133, -15.2649, -1.41303, -33.5098, 1.74527, -7.63879, 49.5083, -49.4795, -6.18359, 96.9511, 25.372, -26.4798, 23.8903, -15, -9.59644, 41.8864, -2.74202, 76.4954, -12.6082, 18.7302, 86.0128, 24.5266, -43.7329, -12.4366, -7.85104, -4.17808, 103.322, -7.79169, 75.636, -2.61948, -2.25411, 100.634, 37.6911, -64.0585, -23.8494, -14.4978, -8.00792, 51.5934, -3.35479, 33.2358, 5.14366, 7.27915, 122, Policy 3 will be: 36.1015, -61.5577, -5.66278, 43.1665, -3, 26.8696, -0.751104, 122, -80.5316, -10, 114.02, 16.5995, -75, 10.3705, 34.4652, -3, -3.38231, 9.86127, 36.9446, -56.5825, -2.01077, 106.645, 47.0842, -27.2848, -3.91254, 0.922449, -40.2178, -13.6899, -10, 67.1624, -28.7014, -10, 118.004, 28.0811, -28.9942, -6.60158, -15, -13.0605, 75.152, -0.430476, 73.527, 0.730939, 4.9269, 85.3713, 29.9146, -56.9522, -16.354, -9.51135, -9.80763, 114.516, -9.20212, 102.077, 9.00783, -7.10397, 101.767, 43.148, -62.15, -19.4201, -14.3578, -12.7402, 28.2568, -4.74503, 53.0355, 32.9098, 4.5658, 106.048, Policy 4 will be: 35.4096, -66.2318, -2.62082, 45, -7.53573, 23.9667, 4.86565, 112.216, -82.2922, -8.80953, 102.327, 15.9779, -75, 23.539, 32.6926, -3, 5.8793, 6.50992, 45.4249, -47.2133, -10, 91.7401, 53.1828, -23.5593, -16.3301, 0.473208, -31.809, -8.49697, -10, 44.741, -31.3338, -6.29686, 118.911, 30.7099, -35.9385, -9.28774, -15, -10.42, 44.5822, -2.41036, 64.6791, -20.3535, 22.3981, 85.0079, 30.7321, -50.7406, -19.91, -4.65602, -12.1404, 99.6162, 7.89473, 90.6154, -17.2543, 4.59708, 122, 40.005, -63.4571, -12.7555, -10.7499, -4.99689, 43.7281, 1.11777, 36.4196, 29.876, 7.99088, 110.235, trial: 0, score: 6 trial: 1, score: 5 Policy 0: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 26.5142, -60.6249, 21.9451, 45, -6.86381, 39.0169, 3.36364, 109.374, -52.7869, 2.29451, 118.627, 14.5809, -75, 28.3931, 33.0633, -6.70136, 32.7747, 1.68911, 54.4919, -26.7414, -10, 114.027, 48.4924, -14.1056, 3.06583, -1.78823, -36.7077, -1.50343, -10, 61.2624, -49.7741, -10, 94.7378, 20.5789, -36.1144, 21.9639, -12.6412, -12.4196, 40.3998, -6.74175, 79.4412, 1.36145, 16.2272, 84.618, 34.066, -53.6692, -4.6244, -4.31162, -3, 86.9618, 4.97056, 79.2127, 22.3661, 5.89418, 118.406, 40.311, -67.9363, -27.2721, -11.8684, -5.75672, 38.4297, -6.57885, 46.7641, 15.5647, 1.1299, 120.926, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 25.0347, -65.3847, 20.498, 40.673, -3, 45.6387, 4.1773, 122, -51.9196, 3.74239, 106.937, 13.4631, -74.1803, 6.01936, 31.9016, -8.08297, -8.385, -4.38868, 32.4405, -58.5748, -10, 90.4101, 53.4691, -21.1133, -15.2649, -1.41303, -33.5098, 1.74527, -7.63879, 49.5083, -49.4795, -6.18359, 96.9511, 25.372, -26.4798, 23.8903, -15, -9.59644, 41.8864, -2.74202, 76.4954, -12.6082, 18.7302, 86.0128, 24.5266, -43.7329, -12.4366, -7.85104, -4.17808, 103.322, -7.79169, 75.636, -2.61948, -2.25411, 100.634, 37.6911, -64.0585, -23.8494, -14.4978, -8.00792, 51.5934, -3.35479, 33.2358, 5.14366, 7.27915, 122, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 36.1015, -61.5577, -5.66278, 43.1665, -3, 26.8696, -0.751104, 122, -80.5316, -10, 114.02, 16.5995, -75, 10.3705, 34.4652, -3, -3.38231, 9.86127, 36.9446, -56.5825, -2.01077, 106.645, 47.0842, -27.2848, -3.91254, 0.922449, -40.2178, -13.6899, -10, 67.1624, -28.7014, -10, 118.004, 28.0811, -28.9942, -6.60158, -15, -13.0605, 75.152, -0.430476, 73.527, 0.730939, 4.9269, 85.3713, 29.9146, -56.9522, -16.354, -9.51135, -9.80763, 114.516, -9.20212, 102.077, 9.00783, -7.10397, 101.767, 43.148, -62.15, -19.4201, -14.3578, -12.7402, 28.2568, -4.74503, 53.0355, 32.9098, 4.5658, 106.048, Average Score: 0 trial: 0, score: 5 trial: 1, score: 0 Policy 4: 35.4096, -66.2318, -2.62082, 45, -7.53573, 23.9667, 4.86565, 112.216, -82.2922, -8.80953, 102.327, 15.9779, -75, 23.539, 32.6926, -3, 5.8793, 6.50992, 45.4249, -47.2133, -10, 91.7401, 53.1828, -23.5593, -16.3301, 0.473208, -31.809, -8.49697, -10, 44.741, -31.3338, -6.29686, 118.911, 30.7099, -35.9385, -9.28774, -15, -10.42, 44.5822, -2.41036, 64.6791, -20.3535, 22.3981, 85.0079, 30.7321, -50.7406, -19.91, -4.65602, -12.1404, 99.6162, 7.89473, 90.6154, -17.2543, 4.59708, 122, 40.005, -63.4571, -12.7555, -10.7499, -4.99689, 43.7281, 1.11777, 36.4196, 29.876, 7.99088, 110.235, Average Score: 2.5 --------------------------------- New Iteration Current Best Policy: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Current Best Policy Score: 5.5 Policy 0 will be: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Policy 1 will be: 31.7648, -58.8831, 7.76532, 41.134, -3, 44.154, 6.41054, 106.673, -65.5599, 2.63083, 112.424, 10.7358, -75, 9.2278, 36.6512, -3, 29.0573, -2.92437, 54.1116, -60.5866, -5.12501, 104.694, 38.1418, -26.7431, -11.7703, 2.4792, -34.6889, 0.244136, -10, 56.8002, -12.9127, -6.9564, 102.16, 25.2402, -30.6595, 16.5958, -13.2528, -11.5034, 44.915, 5.4119, 79.9622, -30.1858, 7.43418, 93.5894, 34.2637, -51.1564, -30.1509, -8.21014, -3.90405, 115.595, 2.50136, 87.9082, 8.71411, 3.09624, 107.516, 39.3153, -62.8043, -6.43126, -11.6358, -4.02882, 76.5437, 3.03284, 31.0949, 36.4964, -2.92092, 117.6, Policy 2 will be: 38.9319, -53.5693, 10.8203, 44.358, -3, 11.4539, -0.858262, 115.073, -40.3567, -10, 122, 7.65497, -74.6246, 0.801762, 32.1801, -3.67796, -8.16331, 11.4673, 44.6747, -39.0074, -10, 98.1048, 48.3074, -13.6841, 10.4712, -1.51469, -32.7228, 25.3738, -10, 54.6171, -46.4671, -10, 117.392, 34.7534, -37.6327, 11.2146, -9.3449, -10.125, 69.8468, 2.58286, 52.6202, 5.4889, 16.5547, 98.9344, 28.9347, -44.8683, -16.2518, -6.49322, -9.44488, 92.4519, 0.599824, 84.2004, -21.6231, 9.87902, 119.079, 26.6605, -62.2219, -40.1768, -10.0886, -5.78523, 70.0691, -3.63129, 38.6409, 43.591, -2.65319, 113.459, Policy 3 will be: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Policy 4 will be: 37.264, -52.6481, 7.13301, 43.8018, -5.57643, 13.3213, 6.5586, 120.21, -72.3883, 4.04999, 102.013, 14.7674, -65.4664, 19.7262, 33.9829, -5.96366, -6.37815, 9.82139, 45.7894, -14.2389, -3.81709, 98.8337, 36.4466, -19.4769, 10.4523, 2.75858, -39.4825, 24.5524, -10, 64.6167, -47.142, -3.65579, 93.061, 24.408, -30.5633, 21.3789, -15, -4.3084, 80.3972, -10, 80.138, -13.6921, 11.8918, 77.6063, 24.878, -52.3301, -4.54468, 1.25625, -3, 84.2466, -3.08712, 88.5978, -19.7772, 3.34063, 112.432, 30.723, -67.3713, -29.74, -15, -3.03732, 62.9322, 4.72656, 29.5419, 7.47116, -9.72102, 122, trial: 0, score: 6 trial: 1, score: 0 Policy 0: 30.4873, -59.4232, 8.75169, 45, -3, 29.5998, -1.63374, 121.091, -59.645, -4.29996, 113.691, 12.5742, -72.5619, 13.9019, 36.4302, -3, 10.022, 1.85734, 41.6967, -38.0336, -10, 104.507, 45.2421, -20.4721, -1.06016, -1.93667, -37.1127, 10.8085, -9.83924, 57.1344, -27.0551, -10, 106.259, 29.9824, -33.4001, 7.79982, -15, -9.16749, 63.5543, -1.82601, 67.1216, -5.75058, 14.3131, 91.9707, 32.7238, -50.1414, -19.9376, -3.90081, -7.29939, 106.531, -0.356547, 87.8944, -1.89438, 1.01662, 113.671, 33.4526, -64.3483, -23.0581, -13.87, -7.25816, 52.5801, -4.77603, 39.4554, 19.2335, -1.11596, 119.425, Average Score: 3 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 31.7648, -58.8831, 7.76532, 41.134, -3, 44.154, 6.41054, 106.673, -65.5599, 2.63083, 112.424, 10.7358, -75, 9.2278, 36.6512, -3, 29.0573, -2.92437, 54.1116, -60.5866, -5.12501, 104.694, 38.1418, -26.7431, -11.7703, 2.4792, -34.6889, 0.244136, -10, 56.8002, -12.9127, -6.9564, 102.16, 25.2402, -30.6595, 16.5958, -13.2528, -11.5034, 44.915, 5.4119, 79.9622, -30.1858, 7.43418, 93.5894, 34.2637, -51.1564, -30.1509, -8.21014, -3.90405, 115.595, 2.50136, 87.9082, 8.71411, 3.09624, 107.516, 39.3153, -62.8043, -6.43126, -11.6358, -4.02882, 76.5437, 3.03284, 31.0949, 36.4964, -2.92092, 117.6, Average Score: 0 trial: 0, score: 3 trial: 1, score: 0 Policy 2: 38.9319, -53.5693, 10.8203, 44.358, -3, 11.4539, -0.858262, 115.073, -40.3567, -10, 122, 7.65497, -74.6246, 0.801762, 32.1801, -3.67796, -8.16331, 11.4673, 44.6747, -39.0074, -10, 98.1048, 48.3074, -13.6841, 10.4712, -1.51469, -32.7228, 25.3738, -10, 54.6171, -46.4671, -10, 117.392, 34.7534, -37.6327, 11.2146, -9.3449, -10.125, 69.8468, 2.58286, 52.6202, 5.4889, 16.5547, 98.9344, 28.9347, -44.8683, -16.2518, -6.49322, -9.44488, 92.4519, 0.599824, 84.2004, -21.6231, 9.87902, 119.079, 26.6605, -62.2219, -40.1768, -10.0886, -5.78523, 70.0691, -3.63129, 38.6409, 43.591, -2.65319, 113.459, Average Score: 1.5 trial: 0, score: 4 trial: 1, score: 4 Policy 3: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 37.264, -52.6481, 7.13301, 43.8018, -5.57643, 13.3213, 6.5586, 120.21, -72.3883, 4.04999, 102.013, 14.7674, -65.4664, 19.7262, 33.9829, -5.96366, -6.37815, 9.82139, 45.7894, -14.2389, -3.81709, 98.8337, 36.4466, -19.4769, 10.4523, 2.75858, -39.4825, 24.5524, -10, 64.6167, -47.142, -3.65579, 93.061, 24.408, -30.5633, 21.3789, -15, -4.3084, 80.3972, -10, 80.138, -13.6921, 11.8918, 77.6063, 24.878, -52.3301, -4.54468, 1.25625, -3, 84.2466, -3.08712, 88.5978, -19.7772, 3.34063, 112.432, 30.723, -67.3713, -29.74, -15, -3.03732, 62.9322, 4.72656, 29.5419, 7.47116, -9.72102, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Current Best Policy Score: 4 Policy 0 will be: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Policy 1 will be: 43.4535, -55.436, -0.113343, 44.0667, -3.34636, 36.3247, -7.60826, 104.058, -67.1866, -10, 121.756, 10.1931, -73.5903, 8.32672, 32.3398, -3, 1.33381, -0.585222, 54.3695, -31.9197, -9.07904, 98.7311, 32.8971, -24.9435, 13.2587, 0.193227, -33.6979, -5.01263, 1.63018, 69.7169, -9.01737, -5.12214, 110.016, 35.6205, -37.7065, 3.25976, -15, -11.1463, 62.4473, -2.91394, 47.6654, -2.36629, 11.4616, 94.2266, 46.9799, -38.0354, -15.9441, -6.50056, -6.68356, 98.0969, 6.87709, 87.9401, -47.1502, -0.762962, 96.6727, 26.8313, -59.0991, -27.0884, -11.3207, -6.14098, 40.4864, -10, 43.4485, 19.4738, 10.1569, 100.567, Policy 2 will be: 30.6891, -56.2791, -3.56093, 42.0516, -3, -5.58446, 7.29095, 113.458, -93.7645, -10, 109.693, 6.3117, -75, 22.1009, 36.9819, -5.51234, -29.6239, 8.78692, 54.6188, -26.4661, -7.8922, 122, 50.1609, -14.3857, -8.46977, -0.877988, -40.444, -9.5449, -3.58681, 69.8728, -31.036, -10, 109.939, 22.2914, -32.2028, 1.71059, -15, -8.62731, 45.6253, -1.27396, 61.6, 22.035, 6.9322, 86.8119, 43.1481, -36.8512, -29.7736, -7.52526, -7.96707, 130, 1.80782, 79.3313, -29.7633, 11.1254, 114.716, 30.2969, -70.3624, -13.1528, -14.4001, -7.26879, 25.1872, -10, 58.4184, 6.54099, 8.61462, 110.404, Policy 3 will be: 30.8937, -68.6064, -13.6334, 44.4449, -6.17813, -6.42744, -1.90001, 107.915, -72.9318, -6.41164, 104.48, 21.0613, -69.0074, 23.9163, 33.6746, -4.54316, -15.4993, -4.1912, 37.111, -6.70814, -0.28397, 114.717, 40.4178, -22.046, 9.26186, -2.43322, -39.5792, -3.2459, -8.47301, 62.4268, -21.8127, -5.26137, 91.9325, 23.3533, -38.7877, 10.9614, -9.9275, -7.33631, 54.8218, -10, 61.6213, 23.8154, 9.90793, 72.052, 35.0988, -51.0618, -18.2149, -3.02582, -8.47449, 130, 9.82692, 85.675, -43.2927, 0.674917, 91.1437, 28.8338, -58.2531, -3.15704, -8.94069, -4.08053, 40.8065, -10, 49.4175, 26.9295, 17.0686, 116.718, Policy 4 will be: 40.6359, -62.1158, 5.83405, 45, -8.26778, -4.12346, 4.57911, 122, -79.3547, -1.00643, 110.961, 5.67265, -75, 28.1598, 30.8004, -8.79076, -21.2577, 7.82512, 37.8495, -26.3329, -3.1021, 105.587, 41.8672, -26.224, -2.3795, -8.47994, -30.6311, 17.7641, 4.93408, 61.7689, -22.3017, -10, 93.0485, 31.9151, -34.788, -6.38361, -9.48176, -13.9669, 57.8568, -0.138884, 61.1605, 1.8269, 7.45067, 74.8579, 35.7035, -37.2379, -5.63743, -6.98582, -10.326, 124.745, 3.02774, 89.7285, -21.914, -0.340005, 111.285, 39.5297, -66.4399, -25.8577, -9.0158, -6.41131, 45.8052, -4.41007, 44.1774, 38.2234, 15.4934, 100.018, trial: 0, score: 4 trial: 1, score: 4 Policy 0: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Average Score: 4 trial: 0, score: 2 trial: 1, score: 1 Policy 1: 43.4535, -55.436, -0.113343, 44.0667, -3.34636, 36.3247, -7.60826, 104.058, -67.1866, -10, 121.756, 10.1931, -73.5903, 8.32672, 32.3398, -3, 1.33381, -0.585222, 54.3695, -31.9197, -9.07904, 98.7311, 32.8971, -24.9435, 13.2587, 0.193227, -33.6979, -5.01263, 1.63018, 69.7169, -9.01737, -5.12214, 110.016, 35.6205, -37.7065, 3.25976, -15, -11.1463, 62.4473, -2.91394, 47.6654, -2.36629, 11.4616, 94.2266, 46.9799, -38.0354, -15.9441, -6.50056, -6.68356, 98.0969, 6.87709, 87.9401, -47.1502, -0.762962, 96.6727, 26.8313, -59.0991, -27.0884, -11.3207, -6.14098, 40.4864, -10, 43.4485, 19.4738, 10.1569, 100.567, Average Score: 1.5 trial: 0, score: 4 trial: 1, score: 3 Policy 2: 30.6891, -56.2791, -3.56093, 42.0516, -3, -5.58446, 7.29095, 113.458, -93.7645, -10, 109.693, 6.3117, -75, 22.1009, 36.9819, -5.51234, -29.6239, 8.78692, 54.6188, -26.4661, -7.8922, 122, 50.1609, -14.3857, -8.46977, -0.877988, -40.444, -9.5449, -3.58681, 69.8728, -31.036, -10, 109.939, 22.2914, -32.2028, 1.71059, -15, -8.62731, 45.6253, -1.27396, 61.6, 22.035, 6.9322, 86.8119, 43.1481, -36.8512, -29.7736, -7.52526, -7.96707, 130, 1.80782, 79.3313, -29.7633, 11.1254, 114.716, 30.2969, -70.3624, -13.1528, -14.4001, -7.26879, 25.1872, -10, 58.4184, 6.54099, 8.61462, 110.404, Average Score: 3.5 trial: 0, score: 2 trial: 1, score: 4 Policy 3: 30.8937, -68.6064, -13.6334, 44.4449, -6.17813, -6.42744, -1.90001, 107.915, -72.9318, -6.41164, 104.48, 21.0613, -69.0074, 23.9163, 33.6746, -4.54316, -15.4993, -4.1912, 37.111, -6.70814, -0.28397, 114.717, 40.4178, -22.046, 9.26186, -2.43322, -39.5792, -3.2459, -8.47301, 62.4268, -21.8127, -5.26137, 91.9325, 23.3533, -38.7877, 10.9614, -9.9275, -7.33631, 54.8218, -10, 61.6213, 23.8154, 9.90793, 72.052, 35.0988, -51.0618, -18.2149, -3.02582, -8.47449, 130, 9.82692, 85.675, -43.2927, 0.674917, 91.1437, 28.8338, -58.2531, -3.15704, -8.94069, -4.08053, 40.8065, -10, 49.4175, 26.9295, 17.0686, 116.718, Average Score: 3 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 40.6359, -62.1158, 5.83405, 45, -8.26778, -4.12346, 4.57911, 122, -79.3547, -1.00643, 110.961, 5.67265, -75, 28.1598, 30.8004, -8.79076, -21.2577, 7.82512, 37.8495, -26.3329, -3.1021, 105.587, 41.8672, -26.224, -2.3795, -8.47994, -30.6311, 17.7641, 4.93408, 61.7689, -22.3017, -10, 93.0485, 31.9151, -34.788, -6.38361, -9.48176, -13.9669, 57.8568, -0.138884, 61.1605, 1.8269, 7.45067, 74.8579, 35.7035, -37.2379, -5.63743, -6.98582, -10.326, 124.745, 3.02774, 89.7285, -21.914, -0.340005, 111.285, 39.5297, -66.4399, -25.8577, -9.0158, -6.41131, 45.8052, -4.41007, 44.1774, 38.2234, 15.4934, 100.018, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Current Best Policy Score: 4 Policy 0 will be: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Policy 1 will be: 37.1374, -60.3476, 0.504336, 45, -3, 32.6135, -4.55903, 105.727, -87.2728, -10, 122, 3.43947, -73.3122, 1.42057, 40.7951, -8.44219, -6.22595, 2.3976, 50.4746, -14.9675, -10, 121.075, 46.0092, -19.1888, -18.7575, 0.162963, -30.7094, -16.031, -10, 52.1587, -22.6696, -10, 108.505, 26.9455, -32.786, -11.1676, -9.38138, -11.9635, 76.6909, -6.04296, 70.9626, 38.5773, 12.3607, 89.9032, 34.4035, -50.6264, -5.84773, -3.43425, -7.03052, 120.517, -1.31127, 82.2257, -20.2118, 9.98579, 97.1191, 32.7061, -56.8551, -31.2896, -15, -6.96685, 31.6254, -10, 48.5649, 42.5161, 13.4805, 118.281, Policy 2 will be: 32.3487, -59.6885, -18.5364, 43.5079, -3, 20.1253, -4.44188, 111.93, -54.2919, -5.55069, 116.999, 10.5287, -65.7041, 9.01837, 32.3551, -6.02407, 18.7041, -0.962338, 37.8323, -46.4322, -10, 117.551, 40.1342, -23.8667, 3.78335, -3.97283, -38.3855, 8.23734, -1.36807, 50.3729, 5.61731, -0.711633, 108.008, 27.1977, -26.6049, 9.90054, -15, -9.64746, 78.2282, 0.56024, 57.1224, 9.18123, 18.0384, 90.1836, 41.6367, -38.642, -22.6871, -1.07035, -9.34265, 117.283, 11.1022, 65.105, -0.511246, 8.156, 109.282, 26.1322, -56.2695, -34.6665, -13.5174, -11.1687, 18.8712, -10, 51.6384, 37.53, 13.509, 116.744, Policy 3 will be: 40.9958, -65.9121, -11.8369, 44.435, -3.1312, 26.9523, -9.36737, 110.488, -82.4362, -6.26169, 105.405, 21.6789, -75, 13.7383, 30.5074, -4.56762, 12.8323, 9.03233, 63.5161, -42.1219, -6.63992, 111.741, 33.6683, -23.9662, -6.49704, -4.74604, -37.4099, 23.811, -2.50596, 68.8899, -21.7056, -8.56945, 96.6614, 37.5521, -26.5865, -3.42197, -14.284, -9.54634, 73.9379, -10, 68.1428, 37.4359, 13.0995, 84.883, 47.8073, -46.0435, -26.2651, 3.09729, -10.7217, 102.59, 15.0776, 64.2089, -37.077, 6.02996, 108.639, 32.2876, -59.6465, -3.71435, -12.3907, -4.85417, 16.2407, -10, 58.4357, -1.12271, 7.03519, 107.09, Policy 4 will be: 34.1713, -67.0513, 0.518013, 45, -8.53598, -4.60153, -5.28769, 120.547, -84.3882, -6.21901, 122, 4.21667, -75, -3.98729, 39.2923, -3, 3.23091, 7.69312, 44.9141, -10.6775, -6.9384, 106.942, 37.7549, -19.665, -14.9696, -1.29242, -36.1041, -16.885, 5.04731, 62.0138, -4.46266, -4.06327, 107.184, 38.8562, -29.1781, 8.95756, -15, -11.9949, 57.1766, -3.27538, 63.2854, 13.3699, 9.11737, 89.7455, 41.4742, -44.9829, -19.8973, -1.03608, -10.7968, 100.327, 1.94059, 68.9468, -39.2713, 14.555, 110.547, 33.3672, -61.0851, -21.0647, -15, -6.53643, 38.0252, -3.94753, 46.5808, 24.6036, 4.78957, 95.934, trial: 0, score: 5 trial: 1, score: 3 Policy 0: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Average Score: 4 trial: 0, score: 0 trial: 1, score: 5 Policy 1: 37.1374, -60.3476, 0.504336, 45, -3, 32.6135, -4.55903, 105.727, -87.2728, -10, 122, 3.43947, -73.3122, 1.42057, 40.7951, -8.44219, -6.22595, 2.3976, 50.4746, -14.9675, -10, 121.075, 46.0092, -19.1888, -18.7575, 0.162963, -30.7094, -16.031, -10, 52.1587, -22.6696, -10, 108.505, 26.9455, -32.786, -11.1676, -9.38138, -11.9635, 76.6909, -6.04296, 70.9626, 38.5773, 12.3607, 89.9032, 34.4035, -50.6264, -5.84773, -3.43425, -7.03052, 120.517, -1.31127, 82.2257, -20.2118, 9.98579, 97.1191, 32.7061, -56.8551, -31.2896, -15, -6.96685, 31.6254, -10, 48.5649, 42.5161, 13.4805, 118.281, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 32.3487, -59.6885, -18.5364, 43.5079, -3, 20.1253, -4.44188, 111.93, -54.2919, -5.55069, 116.999, 10.5287, -65.7041, 9.01837, 32.3551, -6.02407, 18.7041, -0.962338, 37.8323, -46.4322, -10, 117.551, 40.1342, -23.8667, 3.78335, -3.97283, -38.3855, 8.23734, -1.36807, 50.3729, 5.61731, -0.711633, 108.008, 27.1977, -26.6049, 9.90054, -15, -9.64746, 78.2282, 0.56024, 57.1224, 9.18123, 18.0384, 90.1836, 41.6367, -38.642, -22.6871, -1.07035, -9.34265, 117.283, 11.1022, 65.105, -0.511246, 8.156, 109.282, 26.1322, -56.2695, -34.6665, -13.5174, -11.1687, 18.8712, -10, 51.6384, 37.53, 13.509, 116.744, Average Score: 0 trial: 0, score: 0 trial: 1, score: 2 Policy 3: 40.9958, -65.9121, -11.8369, 44.435, -3.1312, 26.9523, -9.36737, 110.488, -82.4362, -6.26169, 105.405, 21.6789, -75, 13.7383, 30.5074, -4.56762, 12.8323, 9.03233, 63.5161, -42.1219, -6.63992, 111.741, 33.6683, -23.9662, -6.49704, -4.74604, -37.4099, 23.811, -2.50596, 68.8899, -21.7056, -8.56945, 96.6614, 37.5521, -26.5865, -3.42197, -14.284, -9.54634, 73.9379, -10, 68.1428, 37.4359, 13.0995, 84.883, 47.8073, -46.0435, -26.2651, 3.09729, -10.7217, 102.59, 15.0776, 64.2089, -37.077, 6.02996, 108.639, 32.2876, -59.6465, -3.71435, -12.3907, -4.85417, 16.2407, -10, 58.4357, -1.12271, 7.03519, 107.09, Average Score: 1 trial: 0, score: 5 trial: 1, score: 3 Policy 4: 34.1713, -67.0513, 0.518013, 45, -8.53598, -4.60153, -5.28769, 120.547, -84.3882, -6.21901, 122, 4.21667, -75, -3.98729, 39.2923, -3, 3.23091, 7.69312, 44.9141, -10.6775, -6.9384, 106.942, 37.7549, -19.665, -14.9696, -1.29242, -36.1041, -16.885, 5.04731, 62.0138, -4.46266, -4.06327, 107.184, 38.8562, -29.1781, 8.95756, -15, -11.9949, 57.1766, -3.27538, 63.2854, 13.3699, 9.11737, 89.7455, 41.4742, -44.9829, -19.8973, -1.03608, -10.7968, 100.327, 1.94059, 68.9468, -39.2713, 14.555, 110.547, 33.3672, -61.0851, -21.0647, -15, -6.53643, 38.0252, -3.94753, 46.5808, 24.6036, 4.78957, 95.934, Average Score: 4 --------------------------------- New Iteration Current Best Policy: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Current Best Policy Score: 4 Policy 0 will be: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Policy 1 will be: 43.5285, -63.7269, -14.281, 43.9847, -3, 13.774, 3.04455, 108.474, -57.5089, -10, 122, 21.6125, -75, 10.4467, 31.0744, -3, 3.08575, 9.55721, 56.2415, -52.1769, -1.36035, 121.962, 37.9829, -15.2524, 13.1994, -3.33598, -37.1948, -9.38603, 5.93746, 49.5208, -24.9082, -6.53785, 100.854, 29.7714, -28.58, 12.2644, -11.4232, -12.6064, 81.061, -10, 46.4089, 13.5155, 20.2418, 99.3323, 43.8119, -48.9464, -4.57291, -5.61927, -5.31072, 109.071, 1.28891, 71.9279, -24.6777, 11.8762, 92.2117, 28.0751, -63.0635, -12.7639, -15, -5.35137, 52.5723, -1.90027, 42.0541, 34.4517, 0.673128, 94.6953, Policy 2 will be: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Policy 3 will be: 38.337, -69.969, 0.147581, 45, -3.40251, 38.6341, 0.586505, 112.038, -83.097, -6.02118, 115.986, 11.2133, -67.1303, 13.6195, 40.3727, -5.29989, -20.3307, 8.35494, 36.8904, -42.7914, -10, 116.747, 37.1825, -13.7762, 7.66095, -3.27783, -38.0427, 22.7824, -10, 58.553, -29.2135, -10, 103.576, 31.8551, -37.2105, -13.9723, -9.95176, -17.6958, 67.5019, -0.241433, 62.3065, -1.63391, 9.27134, 85.2203, 45.911, -41.8981, -33.7889, -2.54545, -9.57215, 118.276, 13.8456, 70.7956, -17.2791, 6.97154, 105.393, 33.3077, -65.2078, -8.86408, -15, -7.7189, 43.8492, -2.30945, 34.3151, 1.26409, 9.50235, 97.0163, Policy 4 will be: 37.7393, -59.3093, 6.65568, 45, -3.69337, 26.9854, 7.00372, 122, -77.6711, -5.98868, 111.495, 4.80675, -75, 17.1992, 38.4824, -3, -28.8718, 2.3716, 50.0394, -44.6241, -10, 122, 35.5901, -26.4467, 14.7024, 2.40958, -38.9277, 0.469342, -10, 57.3545, -2.62681, -10, 106.108, 28.98, -31.1419, 7.61783, -15, -11.5297, 87.7299, -10, 72.735, 23.699, 19.6639, 74.3569, 39.7602, -49.7453, -19.9083, -1.24463, -5.44973, 130, 15.1302, 72.0466, -19.6715, 15.0626, 93.653, 27.739, -58.2275, -19.5177, -12.1808, -12.465, 42.706, -10, 34.0789, 28.0061, 13.8666, 120.244, trial: 0, score: 3 trial: 1, score: 2 Policy 0: 33.8917, -62.7253, -1.16697, 45, -3.24513, 17.7347, -1.86649, 116.793, -75.0225, -9.78647, 115.137, 12.0646, -72.8499, 11.9062, 36.475, -3, -5.23963, 0.186035, 51.4617, -29.2516, -10, 112.407, 41.774, -19.8615, -2.79593, -3.07818, -34.8153, 2.2539, -2.97025, 63.7407, -15.2178, -10, 106.282, 30.0556, -33.2148, 2.68024, -15, -12.6098, 67.9826, -8.27107, 61.0452, 16.6732, 12.5885, 86.2037, 39.2226, -44.3257, -21.7607, -1.8993, -8.27108, 111.726, 7.79557, 78.7659, -24.3824, 5.56306, 101.525, 33.6961, -63.3225, -19.0275, -14.7073, -8.91251, 36.3625, -10, 46.8846, 22.6692, 7.50132, 105.91, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 43.5285, -63.7269, -14.281, 43.9847, -3, 13.774, 3.04455, 108.474, -57.5089, -10, 122, 21.6125, -75, 10.4467, 31.0744, -3, 3.08575, 9.55721, 56.2415, -52.1769, -1.36035, 121.962, 37.9829, -15.2524, 13.1994, -3.33598, -37.1948, -9.38603, 5.93746, 49.5208, -24.9082, -6.53785, 100.854, 29.7714, -28.58, 12.2644, -11.4232, -12.6064, 81.061, -10, 46.4089, 13.5155, 20.2418, 99.3323, 43.8119, -48.9464, -4.57291, -5.61927, -5.31072, 109.071, 1.28891, 71.9279, -24.6777, 11.8762, 92.2117, 28.0751, -63.0635, -12.7639, -15, -5.35137, 52.5723, -1.90027, 42.0541, 34.4517, 0.673128, 94.6953, Average Score: 0 trial: 0, score: 5 trial: 1, score: 4 Policy 2: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Average Score: 4.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 38.337, -69.969, 0.147581, 45, -3.40251, 38.6341, 0.586505, 112.038, -83.097, -6.02118, 115.986, 11.2133, -67.1303, 13.6195, 40.3727, -5.29989, -20.3307, 8.35494, 36.8904, -42.7914, -10, 116.747, 37.1825, -13.7762, 7.66095, -3.27783, -38.0427, 22.7824, -10, 58.553, -29.2135, -10, 103.576, 31.8551, -37.2105, -13.9723, -9.95176, -17.6958, 67.5019, -0.241433, 62.3065, -1.63391, 9.27134, 85.2203, 45.911, -41.8981, -33.7889, -2.54545, -9.57215, 118.276, 13.8456, 70.7956, -17.2791, 6.97154, 105.393, 33.3077, -65.2078, -8.86408, -15, -7.7189, 43.8492, -2.30945, 34.3151, 1.26409, 9.50235, 97.0163, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 37.7393, -59.3093, 6.65568, 45, -3.69337, 26.9854, 7.00372, 122, -77.6711, -5.98868, 111.495, 4.80675, -75, 17.1992, 38.4824, -3, -28.8718, 2.3716, 50.0394, -44.6241, -10, 122, 35.5901, -26.4467, 14.7024, 2.40958, -38.9277, 0.469342, -10, 57.3545, -2.62681, -10, 106.108, 28.98, -31.1419, 7.61783, -15, -11.5297, 87.7299, -10, 72.735, 23.699, 19.6639, 74.3569, 39.7602, -49.7453, -19.9083, -1.24463, -5.44973, 130, 15.1302, 72.0466, -19.6715, 15.0626, 93.653, 27.739, -58.2275, -19.5177, -12.1808, -12.465, 42.706, -10, 34.0789, 28.0061, 13.8666, 120.244, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Current Best Policy Score: 4.5 Policy 0 will be: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Policy 1 will be: 37.5086, -53.1763, 22.0253, 38.8164, -7.38971, 31.4546, -10, 114.291, -75.5591, -9.6713, 98.4703, 0, -74.0514, 24.121, 36.0101, -3, -46.7276, -10, 42.1987, -49.8478, -10, 122, 44.442, -22.8089, -26.1597, -4.96885, -37.9806, 21.1656, -7.91044, 74.859, 0.84261, 0.992032, 96.5661, 38.8527, -42.2717, 9.02917, -15, -15.0806, 79.2613, -9.20851, 77.6702, 10.5414, 3.74666, 83.6385, 35.5113, -43.0236, -39.5318, 1.51699, -5.66913, 108.348, 12.4293, 76.0853, -27.2993, 6.93787, 93.9445, 33.976, -64.6772, -34.5059, -12.3274, -3, 8.79757, -3.42304, 57.8742, 30.0804, 8.95472, 114.114, Policy 2 will be: 52.414, -61.5415, 7.69998, 45, -7.81858, 23.6855, -10, 101.151, -68.0282, -3.61243, 107.162, 4.3671, -71.6311, 1.45176, 39.9756, -3, -20.5589, -0.744189, 53.862, -60.2455, -10, 120.363, 39.2648, -22.8002, 2.3132, -3.05549, -38.443, 12.1546, -6.47513, 51.0869, -35.5997, -10, 101.441, 32.5895, -41.5611, 7.43505, -15, -12.7854, 56.3278, -8.22968, 52.13, 16.9434, 11.7339, 64.9137, 37.8776, -33.1843, -45.9377, 1.98063, -3, 110.945, 13.3764, 86.0214, -14.479, 4.11712, 103.77, 21.707, -62.0179, -31.7939, -13.9523, -3, 25.3806, -10, 38.3852, 52.106, -3.93244, 116.04, Policy 3 will be: 49.7241, -63.509, 14.4325, 37.0237, -7.32954, 16.1054, -0.460676, 107.628, -58.8439, -8.58255, 103.519, 14.2281, -73.2124, 1.27996, 33.116, -8.10895, -12.4237, -1.05742, 36.2653, -68.8209, 1.0125, 114.501, 40.0976, -22.495, -7.57418, -6.46189, -42.8073, -9.10007, -10, 71.1544, -19.5769, -8.23604, 115.686, 38.0097, -45.0179, -17.8925, -13.7678, -5.90179, 44.0773, -10, 54.1825, 2.95426, 18.433, 73.4149, 34.9493, -32.7514, -52.4108, -4.39828, -3, 122.181, 5.94832, 87.9295, -9.41643, 7.05255, 94.6798, 30.33, -64.736, -39.3046, -15, -5.51903, 21.8565, -3.32757, 42.2688, 54.9185, -6.73398, 103.604, Policy 4 will be: 42.754, -64.0478, 5.27666, 41.6523, -10.3116, 32.9012, 3.34492, 97.6244, -103.209, -8.47791, 111.874, 2.28202, -75, -2.34814, 29.9974, -3, -45.8832, -8.72653, 45.8981, -47.9914, 1.58885, 122, 47.4512, -20.3188, 2.41949, -4.63562, -40.0401, 18.8007, -10, 75.5903, -19.9669, -10, 92.4858, 25.5021, -34.1036, -5.42407, -10.9675, -11.1789, 64.0618, -10, 51.5455, 34.7199, 2.29752, 86.8326, 31.3766, -37.3398, -43.5418, 2.99387, -4.69081, 124.277, 23.3359, 89.1938, -8.29113, -3.11025, 102.148, 31.1871, -55.3596, -19.8982, -15, -3, 6.89138, -10, 47.9673, 40.5452, -1.41638, 122, trial: 0, score: 0 trial: 1, score: 4 Policy 0: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Average Score: 2 trial: 0, score: 1 trial: 1, score: 1 Policy 1: 37.5086, -53.1763, 22.0253, 38.8164, -7.38971, 31.4546, -10, 114.291, -75.5591, -9.6713, 98.4703, 0, -74.0514, 24.121, 36.0101, -3, -46.7276, -10, 42.1987, -49.8478, -10, 122, 44.442, -22.8089, -26.1597, -4.96885, -37.9806, 21.1656, -7.91044, 74.859, 0.84261, 0.992032, 96.5661, 38.8527, -42.2717, 9.02917, -15, -15.0806, 79.2613, -9.20851, 77.6702, 10.5414, 3.74666, 83.6385, 35.5113, -43.0236, -39.5318, 1.51699, -5.66913, 108.348, 12.4293, 76.0853, -27.2993, 6.93787, 93.9445, 33.976, -64.6772, -34.5059, -12.3274, -3, 8.79757, -3.42304, 57.8742, 30.0804, 8.95472, 114.114, Average Score: 1 trial: 0, score: 0 trial: 1, score: 4 Policy 2: 52.414, -61.5415, 7.69998, 45, -7.81858, 23.6855, -10, 101.151, -68.0282, -3.61243, 107.162, 4.3671, -71.6311, 1.45176, 39.9756, -3, -20.5589, -0.744189, 53.862, -60.2455, -10, 120.363, 39.2648, -22.8002, 2.3132, -3.05549, -38.443, 12.1546, -6.47513, 51.0869, -35.5997, -10, 101.441, 32.5895, -41.5611, 7.43505, -15, -12.7854, 56.3278, -8.22968, 52.13, 16.9434, 11.7339, 64.9137, 37.8776, -33.1843, -45.9377, 1.98063, -3, 110.945, 13.3764, 86.0214, -14.479, 4.11712, 103.77, 21.707, -62.0179, -31.7939, -13.9523, -3, 25.3806, -10, 38.3852, 52.106, -3.93244, 116.04, Average Score: 2 trial: 0, score: 2 trial: 1, score: 1 Policy 3: 49.7241, -63.509, 14.4325, 37.0237, -7.32954, 16.1054, -0.460676, 107.628, -58.8439, -8.58255, 103.519, 14.2281, -73.2124, 1.27996, 33.116, -8.10895, -12.4237, -1.05742, 36.2653, -68.8209, 1.0125, 114.501, 40.0976, -22.495, -7.57418, -6.46189, -42.8073, -9.10007, -10, 71.1544, -19.5769, -8.23604, 115.686, 38.0097, -45.0179, -17.8925, -13.7678, -5.90179, 44.0773, -10, 54.1825, 2.95426, 18.433, 73.4149, 34.9493, -32.7514, -52.4108, -4.39828, -3, 122.181, 5.94832, 87.9295, -9.41643, 7.05255, 94.6798, 30.33, -64.736, -39.3046, -15, -5.51903, 21.8565, -3.32757, 42.2688, 54.9185, -6.73398, 103.604, Average Score: 1.5 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 42.754, -64.0478, 5.27666, 41.6523, -10.3116, 32.9012, 3.34492, 97.6244, -103.209, -8.47791, 111.874, 2.28202, -75, -2.34814, 29.9974, -3, -45.8832, -8.72653, 45.8981, -47.9914, 1.58885, 122, 47.4512, -20.3188, 2.41949, -4.63562, -40.0401, 18.8007, -10, 75.5903, -19.9669, -10, 92.4858, 25.5021, -34.1036, -5.42407, -10.9675, -11.1789, 64.0618, -10, 51.5455, 34.7199, 2.29752, 86.8326, 31.3766, -37.3398, -43.5418, 2.99387, -4.69081, 124.277, 23.3359, 89.1938, -8.29113, -3.11025, 102.148, 31.1871, -55.3596, -19.8982, -15, -3, 6.89138, -10, 47.9673, 40.5452, -1.41638, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Current Best Policy Score: 2 Policy 0 will be: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Policy 1 will be: 38.0008, -58.022, 20.3763, 44.1878, -10.3289, 41.1312, -8.49817, 103.886, -74.4601, 5.15582, 104.668, 0.620802, -69.1776, 1.63175, 30.8132, -6.87736, -23.6127, 1.25941, 36.165, -64.5344, -3.64871, 122, 49.2714, -17.6122, -10.845, -10.6459, -34.3835, 9.7825, -1.13823, 47.6565, -8.28366, -7.61652, 114.53, 29.4144, -32.6469, -24.0137, -10.3191, -14.689, 48.5035, -10, 72.8335, 8.71468, 3.21072, 71.3529, 29.3216, -40.8242, -27.0276, 4.52852, -7.36126, 122.221, 10.1646, 91.3189, 9.45522, -4.16733, 118.512, 34.7784, -59.6383, -15.6439, -15, -8.93626, 8.7242, -10, 39.0306, 33.8644, 2.14851, 110.556, Policy 2 will be: 48.9997, -64.6753, 1.92508, 43.5727, -10.1111, 57.5045, 2.92608, 114.001, -79.3834, -8.81016, 115.779, 12.4913, -75, 17.0079, 40.7733, -3.09681, -45.5285, -9.85773, 42.2078, -41.5164, -10, 122, 47.3558, -18.0814, -13.124, -2.84121, -39.4754, 1.45518, -10, 52.616, -29.4519, -10, 114.102, 34.6617, -43.9161, -4.18749, -15, -11.123, 45.792, -10, 57.892, 24.5946, 19.3457, 79.2878, 29.4789, -34.4162, -48.7195, 7.15141, -12.1757, 93.4386, 4.86781, 71.5047, -1.79385, 1.09792, 96.9218, 36.5336, -61.8055, -42.3253, -14.0163, -4.43615, 3.60944, -10, 44.779, 26.6927, -1.09743, 110.989, Policy 3 will be: 47.3355, -63.7706, 1.34931, 36.3894, -5.83568, 38.3978, -10, 107.019, -85.4431, -7.99946, 117.831, 15.3198, -75, 6.65716, 32.9583, -3, -14.6252, 3.43406, 38.7403, -51.7609, -4.61881, 119.046, 57.7839, -24.1187, -25.7969, -10.0256, -38.2308, -1.73858, -3.57439, 67.0902, -39.5037, 3.58129, 106.903, 26.6286, -41.8194, 8.99371, -9.35352, -12.7453, 80.4995, -10, 51.3325, 26.1543, 21.1723, 89.4304, 39.8435, -38.548, -20.3188, -4.24262, -4.4905, 107.692, 22.5801, 68.2416, -19.1582, -0.473553, 111.11, 16.9048, -61.7489, -23.1727, -15, -3, 36.6103, -6.97546, 41.7936, 27.3949, -9.0836, 122, Policy 4 will be: 41.5918, -65.4617, 1.15381, 42.1327, -3, 56.98, -1.27967, 122, -85.3557, 7.05405, 101.381, 3.54842, -70.2101, 13.5272, 36.4185, -6.06823, -38.6603, 5.49636, 53.7462, -65.9275, 3.52788, 121.104, 45.6135, -23.5837, -20.2631, -4.88459, -40.4494, 8.91575, -1.41405, 56.2044, -5.57148, -2.70655, 104.463, 20.3976, -36.2064, -7.77752, -10.8846, -13.5514, 50.3532, -10, 62.1417, 16.1115, 20.9005, 74.3147, 40.2555, -35.7896, -42.5377, -0.107212, -6.61119, 95.6719, 7.61095, 76.4703, -5.0514, -7.93834, 106.81, 21.309, -64.2014, -40.024, -12.5292, -3, 33.4272, -4.81779, 41.4181, 41.6601, -8.2553, 106.438, trial: 0, score: 4 trial: 1, score: 0 Policy 0: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Average Score: 2 trial: 0, score: 0 trial: 1, score: 3 Policy 1: 38.0008, -58.022, 20.3763, 44.1878, -10.3289, 41.1312, -8.49817, 103.886, -74.4601, 5.15582, 104.668, 0.620802, -69.1776, 1.63175, 30.8132, -6.87736, -23.6127, 1.25941, 36.165, -64.5344, -3.64871, 122, 49.2714, -17.6122, -10.845, -10.6459, -34.3835, 9.7825, -1.13823, 47.6565, -8.28366, -7.61652, 114.53, 29.4144, -32.6469, -24.0137, -10.3191, -14.689, 48.5035, -10, 72.8335, 8.71468, 3.21072, 71.3529, 29.3216, -40.8242, -27.0276, 4.52852, -7.36126, 122.221, 10.1646, 91.3189, 9.45522, -4.16733, 118.512, 34.7784, -59.6383, -15.6439, -15, -8.93626, 8.7242, -10, 39.0306, 33.8644, 2.14851, 110.556, Average Score: 1.5 trial: 0, score: 2 trial: 1, score: 2 Policy 2: 48.9997, -64.6753, 1.92508, 43.5727, -10.1111, 57.5045, 2.92608, 114.001, -79.3834, -8.81016, 115.779, 12.4913, -75, 17.0079, 40.7733, -3.09681, -45.5285, -9.85773, 42.2078, -41.5164, -10, 122, 47.3558, -18.0814, -13.124, -2.84121, -39.4754, 1.45518, -10, 52.616, -29.4519, -10, 114.102, 34.6617, -43.9161, -4.18749, -15, -11.123, 45.792, -10, 57.892, 24.5946, 19.3457, 79.2878, 29.4789, -34.4162, -48.7195, 7.15141, -12.1757, 93.4386, 4.86781, 71.5047, -1.79385, 1.09792, 96.9218, 36.5336, -61.8055, -42.3253, -14.0163, -4.43615, 3.60944, -10, 44.779, 26.6927, -1.09743, 110.989, Average Score: 2 trial: 0, score: 2 trial: 1, score: 1 Policy 3: 47.3355, -63.7706, 1.34931, 36.3894, -5.83568, 38.3978, -10, 107.019, -85.4431, -7.99946, 117.831, 15.3198, -75, 6.65716, 32.9583, -3, -14.6252, 3.43406, 38.7403, -51.7609, -4.61881, 119.046, 57.7839, -24.1187, -25.7969, -10.0256, -38.2308, -1.73858, -3.57439, 67.0902, -39.5037, 3.58129, 106.903, 26.6286, -41.8194, 8.99371, -9.35352, -12.7453, 80.4995, -10, 51.3325, 26.1543, 21.1723, 89.4304, 39.8435, -38.548, -20.3188, -4.24262, -4.4905, 107.692, 22.5801, 68.2416, -19.1582, -0.473553, 111.11, 16.9048, -61.7489, -23.1727, -15, -3, 36.6103, -6.97546, 41.7936, 27.3949, -9.0836, 122, Average Score: 1.5 trial: 0, score: 1 trial: 1, score: 1 Policy 4: 41.5918, -65.4617, 1.15381, 42.1327, -3, 56.98, -1.27967, 122, -85.3557, 7.05405, 101.381, 3.54842, -70.2101, 13.5272, 36.4185, -6.06823, -38.6603, 5.49636, 53.7462, -65.9275, 3.52788, 121.104, 45.6135, -23.5837, -20.2631, -4.88459, -40.4494, 8.91575, -1.41405, 56.2044, -5.57148, -2.70655, 104.463, 20.3976, -36.2064, -7.77752, -10.8846, -13.5514, 50.3532, -10, 62.1417, 16.1115, 20.9005, 74.3147, 40.2555, -35.7896, -42.5377, -0.107212, -6.61119, 95.6719, 7.61095, 76.4703, -5.0514, -7.93834, 106.81, 21.309, -64.2014, -40.024, -12.5292, -3, 33.4272, -4.81779, 41.4181, 41.6601, -8.2553, 106.438, Average Score: 1 --------------------------------- New Iteration Current Best Policy: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Current Best Policy Score: 2 Policy 0 will be: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Policy 1 will be: 52.4658, -59.1331, -0.838178, 38.2282, -3.47914, 50.3492, -5.74291, 105.091, -78.9, 2.60435, 105.093, 10.1024, -75, 10.0903, 32.8793, -9.89442, -6.92825, 1.55207, 48.81, -62.5042, -3.8541, 122, 38.7301, -15.5321, -1.86149, -2.67679, -33.0147, -4.75638, -10, 53.7162, -10.5196, -8.20194, 107.214, 21.2649, -43.9143, -8.97411, -12.6156, -9.86821, 55.3503, -10, 56.7377, 15.9935, 6.01197, 77.7352, 30.6766, -40.1187, -44.5935, -0.16079, -3.00517, 108.622, 15.3712, 67.1411, -11.7068, -3.2129, 114.091, 24.1407, -64.5811, -20.675, -15, -8.74402, 31.9047, -10, 47.0712, 32.2402, -8.44002, 122, Policy 2 will be: 41.8356, -63.192, 17.3123, 45, -3, 34.1423, -1.39677, 100.086, -57.3874, -0.444641, 113.598, 16.3075, -71.5538, 19.7316, 40.2619, -3, -24.4245, -8.26956, 34.5156, -25.7846, -6.1182, 117.296, 39.5012, -25.2658, 0.454939, -8.3778, -32.2056, -15.0848, -10, 66.238, -10.6553, 0.570578, 103.388, 24.2706, -35.8118, -15.9542, -15, -14.2491, 44.4704, -10, 72.2259, 16.5356, 2.47172, 74.5183, 41.2273, -46.0558, -49.6801, -3.32846, -3.29267, 90.8356, 18.6983, 78.3782, -25.4173, -1.36647, 104.766, 22.6473, -62.3854, -22.6563, -15, -3, 8.50748, -10, 58.5293, 15.2396, 1.9158, 99.5969, Policy 3 will be: 36.2503, -58.5455, 27.3668, 36.7894, -7.76507, 28.9785, -9.49965, 113.969, -81.2182, -10, 101.836, 1.31845, -73.0421, -3.12603, 30.5191, -3.21564, -25.0269, -1.11815, 43.515, -54.1381, -2.10964, 122, 43.5999, -16.146, -5.36744, -7.29919, -34.4844, 8.11776, -10, 55.7053, -8.45987, -4.43578, 108.175, 30.5678, -35.601, -9.19844, -15, -11.6318, 67.1066, -10, 60.429, 4.05289, 10.9336, 87.5634, 40.2688, -41.5124, -38.8283, 3.54311, -8.73939, 115.04, 8.15309, 83.4477, 5.62148, 2.53997, 91.5766, 33.9483, -66.841, -24.0198, -10.4997, -3, -10.6676, -10, 53.5181, 36.2565, -6.57526, 122, Policy 4 will be: 43.8195, -59.7407, 12.3847, 45, -3, 22.1241, -6.86426, 104.505, -62.1448, 3.7875, 113.822, 12.8671, -68.1426, 19.4426, 38.5377, -7.31349, -20.3109, -10, 54.672, -48.7088, -2.5195, 108.421, 50.5823, -24.5936, -10.9045, -13.1756, -39.3753, -21.7414, -10, 68.4086, -40.1721, -4.64786, 103.557, 39.198, -43.2879, -21.1953, -15, -3.87746, 41.1559, -10, 53.033, 40.2478, 9.07095, 71.0338, 41.333, -39.6593, -50.6921, 0.70012, -7.33252, 118.823, 18.0297, 73.4488, -13.1816, 6.26548, 96.6221, 33.3251, -55.4244, -12.0641, -10.5516, -3, 16.4793, -10, 36.5715, 19.0929, -3.3283, 103.444, trial: 0, score: 5 trial: 1, score: 6 Policy 0: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Average Score: 5.5 trial: 0, score: 2 trial: 1, score: 3 Policy 1: 52.4658, -59.1331, -0.838178, 38.2282, -3.47914, 50.3492, -5.74291, 105.091, -78.9, 2.60435, 105.093, 10.1024, -75, 10.0903, 32.8793, -9.89442, -6.92825, 1.55207, 48.81, -62.5042, -3.8541, 122, 38.7301, -15.5321, -1.86149, -2.67679, -33.0147, -4.75638, -10, 53.7162, -10.5196, -8.20194, 107.214, 21.2649, -43.9143, -8.97411, -12.6156, -9.86821, 55.3503, -10, 56.7377, 15.9935, 6.01197, 77.7352, 30.6766, -40.1187, -44.5935, -0.16079, -3.00517, 108.622, 15.3712, 67.1411, -11.7068, -3.2129, 114.091, 24.1407, -64.5811, -20.675, -15, -8.74402, 31.9047, -10, 47.0712, 32.2402, -8.44002, 122, Average Score: 2.5 trial: 0, score: 1 trial: 1, score: 4 Policy 2: 41.8356, -63.192, 17.3123, 45, -3, 34.1423, -1.39677, 100.086, -57.3874, -0.444641, 113.598, 16.3075, -71.5538, 19.7316, 40.2619, -3, -24.4245, -8.26956, 34.5156, -25.7846, -6.1182, 117.296, 39.5012, -25.2658, 0.454939, -8.3778, -32.2056, -15.0848, -10, 66.238, -10.6553, 0.570578, 103.388, 24.2706, -35.8118, -15.9542, -15, -14.2491, 44.4704, -10, 72.2259, 16.5356, 2.47172, 74.5183, 41.2273, -46.0558, -49.6801, -3.32846, -3.29267, 90.8356, 18.6983, 78.3782, -25.4173, -1.36647, 104.766, 22.6473, -62.3854, -22.6563, -15, -3, 8.50748, -10, 58.5293, 15.2396, 1.9158, 99.5969, Average Score: 2.5 trial: 0, score: 4 trial: 1, score: 3 Policy 3: 36.2503, -58.5455, 27.3668, 36.7894, -7.76507, 28.9785, -9.49965, 113.969, -81.2182, -10, 101.836, 1.31845, -73.0421, -3.12603, 30.5191, -3.21564, -25.0269, -1.11815, 43.515, -54.1381, -2.10964, 122, 43.5999, -16.146, -5.36744, -7.29919, -34.4844, 8.11776, -10, 55.7053, -8.45987, -4.43578, 108.175, 30.5678, -35.601, -9.19844, -15, -11.6318, 67.1066, -10, 60.429, 4.05289, 10.9336, 87.5634, 40.2688, -41.5124, -38.8283, 3.54311, -8.73939, 115.04, 8.15309, 83.4477, 5.62148, 2.53997, 91.5766, 33.9483, -66.841, -24.0198, -10.4997, -3, -10.6676, -10, 53.5181, 36.2565, -6.57526, 122, Average Score: 3.5 trial: 0, score: 1 trial: 1, score: 1 Policy 4: 43.8195, -59.7407, 12.3847, 45, -3, 22.1241, -6.86426, 104.505, -62.1448, 3.7875, 113.822, 12.8671, -68.1426, 19.4426, 38.5377, -7.31349, -20.3109, -10, 54.672, -48.7088, -2.5195, 108.421, 50.5823, -24.5936, -10.9045, -13.1756, -39.3753, -21.7414, -10, 68.4086, -40.1721, -4.64786, 103.557, 39.198, -43.2879, -21.1953, -15, -3.87746, 41.1559, -10, 53.033, 40.2478, 9.07095, 71.0338, 41.333, -39.6593, -50.6921, 0.70012, -7.33252, 118.823, 18.0297, 73.4488, -13.1816, 6.26548, 96.6221, 33.3251, -55.4244, -12.0641, -10.5516, -3, 16.4793, -10, 36.5715, 19.0929, -3.3283, 103.444, Average Score: 1 --------------------------------- New Iteration Current Best Policy: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Current Best Policy Score: 5.5 Policy 0 will be: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Policy 1 will be: 51.5724, -60.3952, 25.0926, 41.282, -6.54679, 30.9786, -5.80068, 99.3818, -82.9952, -3.88054, 119.71, 4.72394, -75, 7.53149, 35.8201, -3, -29.1159, -6.55411, 38.3256, -24.5431, -3.19475, 122, 56.9592, -24.7795, 2.41713, -8.9372, -40.5642, -1.32977, -10, 48.2689, -36.6366, 1.59603, 93.3183, 23.8268, -34.9074, -10.4529, -12.8999, -14.47, 69.9438, -8.58026, 66.0391, 29.9717, 17.3997, 73.0822, 25.2931, -42.1751, -33.5009, 1.6945, -8.85107, 118.216, 7.8825, 79.9314, -3.30238, 0.501785, 98.1158, 36.221, -66.0607, -41.772, -9.72361, -7.07899, 7.30262, -10, 54.8146, 37.296, -9.11188, 96.0502, Policy 2 will be: 39.4633, -65.4954, 0.628481, 39.8097, -4.66816, 33.8562, -10, 105.281, -87.7675, 0.522014, 99.6713, 6.46049, -75, 31.0096, 29.3471, -3, -41.2756, 2.75235, 47.4252, -26.5152, -7.77607, 122, 54.7876, -27.423, 0.534915, -9.55142, -34.2679, 18.0806, -1.55692, 51.5952, -5.68887, -8.38525, 104.004, 21.7193, -39.9985, 8.84491, -15, -7.89056, 59.2244, -10, 76.911, 7.68373, 5.79023, 82.0893, 32.2774, -45.1084, -40.4289, 3.52102, -4.77774, 78.5319, 6.06518, 76.3785, -35.2222, -5.33327, 119.019, 32.6282, -63.7236, -27.33, -15, -3.24191, 29.3471, -5.52929, 30.7312, 55.3103, 5.32464, 121.659, Policy 3 will be: 38.7062, -54.9864, -0.993425, 45, -3, 17.2152, -1.02261, 113.104, -96.5301, 6.01163, 101.839, 7.49578, -69.6634, 18.5389, 38.4276, -3.30031, -23.4652, 5.73459, 47.2551, -51.7314, -10, 122, 58.1136, -24.86, -20.5572, -13.7012, -37.45, 6.57297, -10, 56.3715, -24.9357, 0.663669, 98.2369, 32.5107, -44.4781, 8.43231, -15, -4.93723, 74.2518, -10, 58.2316, 45.3027, 8.27121, 77.3391, 33.9384, -37.2988, -51.5753, -3.57625, -8.64307, 107.14, 20.1241, 84.4449, -29.46, -2.93213, 99.9032, 35.6586, -65.2977, -9.0716, -12.2635, -8.06259, -7.38737, -10, 43.9172, 43.6951, 1.24186, 111.045, Policy 4 will be: 38.9122, -54.8934, -5.48575, 45, -6.97707, 24.6026, 0.428121, 115.116, -81.9803, -1.76042, 111.036, 0, -74.8288, -1.68333, 36.8048, -3, -16.2938, 1.44302, 41.8485, -57.2909, -0.563921, 122, 54.1357, -24.1105, -4.77712, -6.15266, -38.3348, -25.176, -2.77752, 54.4719, -37.2854, 1.09075, 97.592, 36.2546, -37.5655, -8.46437, -14.3369, -9.83981, 68.4082, -10, 57.561, 20.9738, 4.96954, 75.3468, 28.8992, -45.7304, -53.1871, -0.472817, -3.60546, 112.112, 19.4185, 87.3656, -32.6603, -8.23313, 99.497, 23.3041, -58.9672, -22.1405, -15, -3, 30.4675, -3.36706, 31.4404, 33.406, 8.25002, 114.442, trial: 0, score: 4 trial: 1, score: 4 Policy 0: 42.9303, -60.125, 11.6446, 42.0173, -5.10108, 38.9212, -5.09341, 110.871, -79.5325, -1.53698, 112.096, 7.04622, -75, 13.728, 34.8092, -4.4307, -28.6556, -3.48669, 49.1341, -47.9091, -6.21798, 121.428, 48.4755, -22.4018, -13.324, -7.79313, -37.5822, -1.77775, -10, 60.9889, -21.5044, -5.73478, 103.287, 30.1153, -38.0121, -6.98133, -15, -9.62056, 65.1488, -10, 64.1833, 25.2953, 11.4916, 77.3419, 34.3884, -39.2739, -37.2498, 1.458, -7.08648, 100.365, 14.3327, 77.3175, -12.4471, 1.46262, 105.913, 26.6951, -62.6895, -25.1528, -15, -3.38494, 12.3829, -10, 44.7552, 35.5792, 0.303144, 109.763, Average Score: 4 trial: 0, score: 5 trial: 1, score: 7 Policy 1: 51.5724, -60.3952, 25.0926, 41.282, -6.54679, 30.9786, -5.80068, 99.3818, -82.9952, -3.88054, 119.71, 4.72394, -75, 7.53149, 35.8201, -3, -29.1159, -6.55411, 38.3256, -24.5431, -3.19475, 122, 56.9592, -24.7795, 2.41713, -8.9372, -40.5642, -1.32977, -10, 48.2689, -36.6366, 1.59603, 93.3183, 23.8268, -34.9074, -10.4529, -12.8999, -14.47, 69.9438, -8.58026, 66.0391, 29.9717, 17.3997, 73.0822, 25.2931, -42.1751, -33.5009, 1.6945, -8.85107, 118.216, 7.8825, 79.9314, -3.30238, 0.501785, 98.1158, 36.221, -66.0607, -41.772, -9.72361, -7.07899, 7.30262, -10, 54.8146, 37.296, -9.11188, 96.0502, Average Score: 6 trial: 0, score: 4 trial: 1, score: 5 Policy 2: 39.4633, -65.4954, 0.628481, 39.8097, -4.66816, 33.8562, -10, 105.281, -87.7675, 0.522014, 99.6713, 6.46049, -75, 31.0096, 29.3471, -3, -41.2756, 2.75235, 47.4252, -26.5152, -7.77607, 122, 54.7876, -27.423, 0.534915, -9.55142, -34.2679, 18.0806, -1.55692, 51.5952, -5.68887, -8.38525, 104.004, 21.7193, -39.9985, 8.84491, -15, -7.89056, 59.2244, -10, 76.911, 7.68373, 5.79023, 82.0893, 32.2774, -45.1084, -40.4289, 3.52102, -4.77774, 78.5319, 6.06518, 76.3785, -35.2222, -5.33327, 119.019, 32.6282, -63.7236, -27.33, -15, -3.24191, 29.3471, -5.52929, 30.7312, 55.3103, 5.32464, 121.659, Average Score: 4.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 38.7062, -54.9864, -0.993425, 45, -3, 17.2152, -1.02261, 113.104, -96.5301, 6.01163, 101.839, 7.49578, -69.6634, 18.5389, 38.4276, -3.30031, -23.4652, 5.73459, 47.2551, -51.7314, -10, 122, 58.1136, -24.86, -20.5572, -13.7012, -37.45, 6.57297, -10, 56.3715, -24.9357, 0.663669, 98.2369, 32.5107, -44.4781, 8.43231, -15, -4.93723, 74.2518, -10, 58.2316, 45.3027, 8.27121, 77.3391, 33.9384, -37.2988, -51.5753, -3.57625, -8.64307, 107.14, 20.1241, 84.4449, -29.46, -2.93213, 99.9032, 35.6586, -65.2977, -9.0716, -12.2635, -8.06259, -7.38737, -10, 43.9172, 43.6951, 1.24186, 111.045, Average Score: 0 trial: 0, score: 0 trial: 1, score: 3 Policy 4: 38.9122, -54.8934, -5.48575, 45, -6.97707, 24.6026, 0.428121, 115.116, -81.9803, -1.76042, 111.036, 0, -74.8288, -1.68333, 36.8048, -3, -16.2938, 1.44302, 41.8485, -57.2909, -0.563921, 122, 54.1357, -24.1105, -4.77712, -6.15266, -38.3348, -25.176, -2.77752, 54.4719, -37.2854, 1.09075, 97.592, 36.2546, -37.5655, -8.46437, -14.3369, -9.83981, 68.4082, -10, 57.561, 20.9738, 4.96954, 75.3468, 28.8992, -45.7304, -53.1871, -0.472817, -3.60546, 112.112, 19.4185, 87.3656, -32.6603, -8.23313, 99.497, 23.3041, -58.9672, -22.1405, -15, -3, 30.4675, -3.36706, 31.4404, 33.406, 8.25002, 114.442, Average Score: 1.5 --------------------------------- New Iteration Current Best Policy: 51.5724, -60.3952, 25.0926, 41.282, -6.54679, 30.9786, -5.80068, 99.3818, -82.9952, -3.88054, 119.71, 4.72394, -75, 7.53149, 35.8201, -3, -29.1159, -6.55411, 38.3256, -24.5431, -3.19475, 122, 56.9592, -24.7795, 2.41713, -8.9372, -40.5642, -1.32977, -10, 48.2689, -36.6366, 1.59603, 93.3183, 23.8268, -34.9074, -10.4529, -12.8999, -14.47, 69.9438, -8.58026, 66.0391, 29.9717, 17.3997, 73.0822, 25.2931, -42.1751, -33.5009, 1.6945, -8.85107, 118.216, 7.8825, 79.9314, -3.30238, 0.501785, 98.1158, 36.221, -66.0607, -41.772, -9.72361, -7.07899, 7.30262, -10, 54.8146, 37.296, -9.11188, 96.0502, Current Best Policy Score: 6 Policy 0 will be: 51.5724, -60.3952, 25.0926, 41.282, -6.54679, 30.9786, -5.80068, 99.3818, -82.9952, -3.88054, 119.71, 4.72394, -75, 7.53149, 35.8201, -3, -29.1159, -6.55411, 38.3256, -24.5431, -3.19475, 122, 56.9592, -24.7795, 2.41713, -8.9372, -40.5642, -1.32977, -10, 48.2689, -36.6366, 1.59603, 93.3183, 23.8268, -34.9074, -10.4529, -12.8999, -14.47, 69.9438, -8.58026, 66.0391, 29.9717, 17.3997, 73.0822, 25.2931, -42.1751, -33.5009, 1.6945, -8.85107, 118.216, 7.8825, 79.9314, -3.30238, 0.501785, 98.1158, 36.221, -66.0607, -41.772, -9.72361, -7.07899, 7.30262, -10, 54.8146, 37.296, -9.11188, 96.0502, Policy 1 will be: 54.2212, -64.1355, 24.6441, 43.4798, -7.32176, 12.1329, -7.02978, 110.211, -80.9173, -7.99291, 112.245, 7.48108, -75, -9.57445, 31.7369, -7.08054, -27.6969, 1.12038, 36.0365, -10.2022, -0.881708, 122, 66.1938, -27.8175, -3.61163, -10.1205, -37.71, -3.87908, -5.31172, 39.8488, -21.1634, -4.68177, 87.6485, 27.4477, -37.0228, -21.0456, -7.24423, -16.6766, 53.7102, -10, 55.2049, 45.4829, 16.5254, 69.3841, 31.817, -41.9787, -24.7896, 0.811406, -3.46091, 126.159, 5.12957, 69.0303, 10.2149, -3.67472, 109.148, 28.648, -63.7015, -39.3813, -4.09097, -9.94277, 24.9707, -9.71331, 47.5958, 18.7471, -10, 81.6306, Policy 2 will be: 43.0732, -55.4532, 41.051, 45, -7.93411, 46.9096, -10, 107.214, -83.8124, -6.34955, 122, 12.3693, -67.8784, -0.0158558, 35.9532, -3.57394, -29.0895, -2.06652, 50.3021, -31.3854, 3.06626, 112.01, 65.908, -26.9144, -13.6482, -4.17435, -45.4629, 5.09245, -10, 59.2692, -59.2199, 8.1594, 104.652, 21.4332, -33.7425, -9.08641, -15, -9.49713, 60.0239, -10, 75.4908, 20.7883, 8.49344, 61.0548, 31.5196, -39.2103, -41.007, -0.270561, -11.7365, 130, 3.99624, 66.382, -25.0933, 1.05955, 87.5271, 33.0359, -67.0498, -57.306, -5.48823, -3, 22.3197, -7.55826, 51.6819, 57.5735, -9.87581, 104.878, Policy 3 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 4 will be: 58.3094, -58.7146, 35.4486, 43.798, -10.2241, 8.70338, -3.98016, 88.0284, -76.0237, 2.35772, 113.583, 11.3567, -75, -9.89793, 40.9447, -5.07687, -29.7881, -9.85491, 41.7067, -28.3991, -0.203493, 122, 51.8481, -28.2996, 8.48075, -8.02624, -44.0452, -10.169, -10, 58.0146, -37.9712, -0.262772, 106.267, 28.5873, -40.9151, -17.6445, -8.02079, -19.8579, 90.2617, -3.90101, 63.566, 21.1241, 22.0157, 80.3408, 20.8611, -41.486, -44.5797, 4.37512, -13.4788, 120.724, 1.74684, 83.6939, -1.74146, -1.92172, 91.981, 39.1688, -73.329, -24.4852, -4.69292, -11.1085, 2.36784, -10, 59.1647, 31.9706, -8.34174, 92.9653, trial: 0, score: 5 trial: 1, score: 6 Policy 0: 51.5724, -60.3952, 25.0926, 41.282, -6.54679, 30.9786, -5.80068, 99.3818, -82.9952, -3.88054, 119.71, 4.72394, -75, 7.53149, 35.8201, -3, -29.1159, -6.55411, 38.3256, -24.5431, -3.19475, 122, 56.9592, -24.7795, 2.41713, -8.9372, -40.5642, -1.32977, -10, 48.2689, -36.6366, 1.59603, 93.3183, 23.8268, -34.9074, -10.4529, -12.8999, -14.47, 69.9438, -8.58026, 66.0391, 29.9717, 17.3997, 73.0822, 25.2931, -42.1751, -33.5009, 1.6945, -8.85107, 118.216, 7.8825, 79.9314, -3.30238, 0.501785, 98.1158, 36.221, -66.0607, -41.772, -9.72361, -7.07899, 7.30262, -10, 54.8146, 37.296, -9.11188, 96.0502, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 5 Policy 1: 54.2212, -64.1355, 24.6441, 43.4798, -7.32176, 12.1329, -7.02978, 110.211, -80.9173, -7.99291, 112.245, 7.48108, -75, -9.57445, 31.7369, -7.08054, -27.6969, 1.12038, 36.0365, -10.2022, -0.881708, 122, 66.1938, -27.8175, -3.61163, -10.1205, -37.71, -3.87908, -5.31172, 39.8488, -21.1634, -4.68177, 87.6485, 27.4477, -37.0228, -21.0456, -7.24423, -16.6766, 53.7102, -10, 55.2049, 45.4829, 16.5254, 69.3841, 31.817, -41.9787, -24.7896, 0.811406, -3.46091, 126.159, 5.12957, 69.0303, 10.2149, -3.67472, 109.148, 28.648, -63.7015, -39.3813, -4.09097, -9.94277, 24.9707, -9.71331, 47.5958, 18.7471, -10, 81.6306, Average Score: 2.5 trial: 0, score: 4 trial: 1, score: 5 Policy 2: 43.0732, -55.4532, 41.051, 45, -7.93411, 46.9096, -10, 107.214, -83.8124, -6.34955, 122, 12.3693, -67.8784, -0.0158558, 35.9532, -3.57394, -29.0895, -2.06652, 50.3021, -31.3854, 3.06626, 112.01, 65.908, -26.9144, -13.6482, -4.17435, -45.4629, 5.09245, -10, 59.2692, -59.2199, 8.1594, 104.652, 21.4332, -33.7425, -9.08641, -15, -9.49713, 60.0239, -10, 75.4908, 20.7883, 8.49344, 61.0548, 31.5196, -39.2103, -41.007, -0.270561, -11.7365, 130, 3.99624, 66.382, -25.0933, 1.05955, 87.5271, 33.0359, -67.0498, -57.306, -5.48823, -3, 22.3197, -7.55826, 51.6819, 57.5735, -9.87581, 104.878, Average Score: 4.5 trial: 0, score: 7 trial: 1, score: 7 Policy 3: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 7 trial: 0, score: 5 trial: 1, score: 5 Policy 4: 58.3094, -58.7146, 35.4486, 43.798, -10.2241, 8.70338, -3.98016, 88.0284, -76.0237, 2.35772, 113.583, 11.3567, -75, -9.89793, 40.9447, -5.07687, -29.7881, -9.85491, 41.7067, -28.3991, -0.203493, 122, 51.8481, -28.2996, 8.48075, -8.02624, -44.0452, -10.169, -10, 58.0146, -37.9712, -0.262772, 106.267, 28.5873, -40.9151, -17.6445, -8.02079, -19.8579, 90.2617, -3.90101, 63.566, 21.1241, 22.0157, 80.3408, 20.8611, -41.486, -44.5797, 4.37512, -13.4788, 120.724, 1.74684, 83.6939, -1.74146, -1.92172, 91.981, 39.1688, -73.329, -24.4852, -4.69292, -11.1085, 2.36784, -10, 59.1647, 31.9706, -8.34174, 92.9653, Average Score: 5 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 7 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 38.0725, -68.1152, 44.5128, 45, -5.76192, 25.7252, -10, 110.334, -102.884, -8.58585, 108.177, 7.46806, -69.0078, -11.889, 34.0701, -5.3274, -51.132, -10, 26.3, -43.1758, 2.56933, 110.708, 46.7343, -27.5734, -21.0311, -11.499, -38.4883, -24.7781, -10, 35.6552, -40.2892, 2.14298, 99.8784, 36.3314, -43.0296, 2.20578, -14.1258, -7.6177, 65.6342, -7.96967, 81.6498, 12.8065, 2.33851, 95.4102, 27.5961, -40.7173, -38.4855, 1.70068, -14.4402, 130, -0.0844071, 76.3942, -11.4986, -4.21628, 102.925, 38.9199, -63.9528, -18.8614, -1.57767, -5.3572, -11.5142, -1.33363, 36.5521, 52.8731, -10, 93.013, Policy 2 will be: 54.1705, -70.7748, 46.8568, 45, -9.26929, 44.4553, -10, 114.472, -93.0533, -0.301925, 118.412, 13.6107, -68.2209, -9.23198, 38.0377, -3, -37.5757, -10, 40.1188, -23.5351, -7.15121, 108.853, 54.9812, -24.9799, -19.7643, -12.9955, -33.2416, -8.81798, -2.87473, 60.6284, -67.1865, 4.36618, 95.4488, 31.5255, -39.1052, 1.64125, -4.889, -6.5465, 58.4973, 0.162223, 79.846, 39.4822, 7.29602, 69.0225, 30.7233, -50.8997, -20.7409, -5.57698, -17.5444, 85.4175, 7.11736, 96.5939, -5.02872, -1.09249, 102.793, 48.4518, -68.8982, -13.776, -9.10309, -7.03658, -16.1486, -2.59027, 40.5746, 39.9649, -10, 79.0412, Policy 3 will be: 41.7062, -63.0739, 48.7281, 45, -13.5654, 6.07111, -3.10836, 104.571, -78.258, -9.55192, 119.733, 2.42908, -72.9943, 0.550074, 30.7664, -3, -59.0981, -0.679083, 28.8507, -41.3936, -9.73397, 112.944, 55.2952, -27.843, -21.0773, -9.44397, -31.1839, -7.80522, -10, 46.3324, -57.2092, -9.76041, 113.115, 30.9178, -33.4961, 3.30227, -8.7883, -8.30444, 33.2248, 3.23571, 79.3443, 2.77454, 19.0352, 78.6103, 18.6877, -48.155, -26.4399, -3.76876, -14.5378, 97.7394, 2.57039, 93.0052, -7.56469, 2.90001, 108.296, 46.6844, -63.3495, -44.9151, -6.67088, -9.87246, 23.8692, -7.47332, 33.4777, 7.39658, -2.69496, 96.2422, Policy 4 will be: 42.1784, -64.3742, 51.4757, 45, -10.142, 7.6478, -5.61903, 117.788, -105.057, -10, 122, 19.3377, -73.2584, -11.9345, 33.8874, -3, -55.7569, -10, 20.1428, -53.8332, -7.64955, 121.727, 44.3889, -23.7065, -23.8452, -11.3504, -29.7294, -43.8393, -10, 60.6396, -24.5597, 1.31661, 108.369, 18.8544, -37.1296, 14.6421, -12.6151, -5.16162, 54.7267, -7.11493, 73.2351, 10.3404, 14.5849, 78.271, 17.5511, -42.2792, -33.0023, 1.00823, -17.0908, 128.554, 12.2399, 72.135, -22.3339, 3.06451, 109.139, 34.3321, -68.9374, -12.0861, -10.9712, -3, 11.8942, -10, 37.4938, 44.4137, -2.94245, 88.7163, trial: 0, score: 7 trial: 1, score: 7 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 7 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 38.0725, -68.1152, 44.5128, 45, -5.76192, 25.7252, -10, 110.334, -102.884, -8.58585, 108.177, 7.46806, -69.0078, -11.889, 34.0701, -5.3274, -51.132, -10, 26.3, -43.1758, 2.56933, 110.708, 46.7343, -27.5734, -21.0311, -11.499, -38.4883, -24.7781, -10, 35.6552, -40.2892, 2.14298, 99.8784, 36.3314, -43.0296, 2.20578, -14.1258, -7.6177, 65.6342, -7.96967, 81.6498, 12.8065, 2.33851, 95.4102, 27.5961, -40.7173, -38.4855, 1.70068, -14.4402, 130, -0.0844071, 76.3942, -11.4986, -4.21628, 102.925, 38.9199, -63.9528, -18.8614, -1.57767, -5.3572, -11.5142, -1.33363, 36.5521, 52.8731, -10, 93.013, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 54.1705, -70.7748, 46.8568, 45, -9.26929, 44.4553, -10, 114.472, -93.0533, -0.301925, 118.412, 13.6107, -68.2209, -9.23198, 38.0377, -3, -37.5757, -10, 40.1188, -23.5351, -7.15121, 108.853, 54.9812, -24.9799, -19.7643, -12.9955, -33.2416, -8.81798, -2.87473, 60.6284, -67.1865, 4.36618, 95.4488, 31.5255, -39.1052, 1.64125, -4.889, -6.5465, 58.4973, 0.162223, 79.846, 39.4822, 7.29602, 69.0225, 30.7233, -50.8997, -20.7409, -5.57698, -17.5444, 85.4175, 7.11736, 96.5939, -5.02872, -1.09249, 102.793, 48.4518, -68.8982, -13.776, -9.10309, -7.03658, -16.1486, -2.59027, 40.5746, 39.9649, -10, 79.0412, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 41.7062, -63.0739, 48.7281, 45, -13.5654, 6.07111, -3.10836, 104.571, -78.258, -9.55192, 119.733, 2.42908, -72.9943, 0.550074, 30.7664, -3, -59.0981, -0.679083, 28.8507, -41.3936, -9.73397, 112.944, 55.2952, -27.843, -21.0773, -9.44397, -31.1839, -7.80522, -10, 46.3324, -57.2092, -9.76041, 113.115, 30.9178, -33.4961, 3.30227, -8.7883, -8.30444, 33.2248, 3.23571, 79.3443, 2.77454, 19.0352, 78.6103, 18.6877, -48.155, -26.4399, -3.76876, -14.5378, 97.7394, 2.57039, 93.0052, -7.56469, 2.90001, 108.296, 46.6844, -63.3495, -44.9151, -6.67088, -9.87246, 23.8692, -7.47332, 33.4777, 7.39658, -2.69496, 96.2422, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 42.1784, -64.3742, 51.4757, 45, -10.142, 7.6478, -5.61903, 117.788, -105.057, -10, 122, 19.3377, -73.2584, -11.9345, 33.8874, -3, -55.7569, -10, 20.1428, -53.8332, -7.64955, 121.727, 44.3889, -23.7065, -23.8452, -11.3504, -29.7294, -43.8393, -10, 60.6396, -24.5597, 1.31661, 108.369, 18.8544, -37.1296, 14.6421, -12.6151, -5.16162, 54.7267, -7.11493, 73.2351, 10.3404, 14.5849, 78.271, 17.5511, -42.2792, -33.0023, 1.00823, -17.0908, 128.554, 12.2399, 72.135, -22.3339, 3.06451, 109.139, 34.3321, -68.9374, -12.0861, -10.9712, -3, 11.8942, -10, 37.4938, 44.4137, -2.94245, 88.7163, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 7 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 51.1782, -69.8748, 53.8477, 45, -7.4502, 33.4915, -10, 113.811, -83.9679, -7.90749, 122, 6.52905, -74.4034, 4.4316, 30.6313, -5.7248, -24.3393, -10, 23.5181, -30.6788, -1.06027, 107.958, 44.6264, -31.9894, -12.149, -15, -30.5632, -22.0487, -10, 48.2557, -28.8943, 3.8529, 107.798, 19.7886, -38.2091, -15.27, -10.7936, -12.1074, 59.4304, -4.969, 69.806, 7.01041, 8.87012, 82.3943, 16.9558, -50.422, -9.97781, 1.01791, -11.9879, 116.881, -5.12152, 89.7461, -27.3653, -0.282521, 103.47, 34.4815, -61.4432, -15.9602, -3.12608, -12.0018, -4.76383, 0.102992, 54.3481, 21.2289, -1.70201, 91.995, Policy 2 will be: 46.4804, -66.1279, 24.861, 45, -12.5662, 18.2349, -2.32779, 98.0204, -73.8207, -4.59509, 122, 2.18599, -75, -3.47938, 36.7393, -5.35496, -22.8755, -10, 27.5334, -10.8532, -3.14212, 99.3462, 41.9866, -23.3109, -6.40591, -15, -37.3345, -40.8845, -10, 47.3893, -56.4022, -10, 102.167, 31.9849, -41.2399, 13.178, -12.1278, -11.5997, 65.3158, -6.81839, 54.041, 32.1951, 5.69117, 93.4518, 16.3827, -49.4302, -30.3696, -1.1658, -11.9504, 127.857, 6.97285, 76.4035, -13.2265, 2.37955, 115.99, 47.2593, -69.6495, -19.0047, -2.44393, -3, -2.29675, -6.88776, 32.9167, 41.9209, -10, 94.1562, Policy 3 will be: 55.3339, -61.1265, 45.763, 42.5468, -8.00365, 4.37545, -0.559797, 122, -57.0659, -10, 114.362, 0.281827, -75, 10.6876, 32.6449, -6.4625, -22.4216, -5.96001, 17.2128, -35.8611, -7.44116, 122, 43.478, -33.9779, -8.20258, -11.8441, -34.0211, -45.8552, -7.87614, 58.3919, -24.4418, -4.75895, 113.184, 37.2174, -35.8973, 11.6649, -10.1032, -4.22209, 35.515, -6.97294, 78.3722, 2.98737, 6.97556, 95.5067, 23.8206, -50.266, -19.4376, -7.38852, -10.8636, 105.235, 10.9373, 75.7108, -3.5288, -10, 93.222, 44.2351, -69.037, -28.1193, -10.4046, -10.8091, 15.0673, -10, 45.3169, 9.57843, -9.8503, 92.4835, Policy 4 will be: 47.8447, -72.2919, 32.4305, 41.6183, -4.87708, 29.0945, -9.85069, 112.62, -78.8951, -0.693545, 122, 6.19027, -75, -4.13662, 33.6588, -3.38494, -38.7585, -7.90734, 38.9299, -27.7598, -9.82648, 109.549, 48.2165, -23.5854, -28.2332, -15, -36.5074, -8.75856, -10, 54.9831, -56.8377, 3.04579, 104.322, 19.9194, -42.4797, 8.6346, -7.2916, -9.88253, 76.2981, -5.94681, 74.3642, 3.48436, 17.5838, 96.674, 32.3128, -42.2047, -33.1536, -3.8336, -10.7954, 93.7377, 10.9934, 79.2904, -12.99, 3.7663, 117.734, 39.4337, -62.3211, -18.6267, -12.4655, -3.38417, 2.45151, 2.32756, 55.2153, 11.213, -2.96933, 100.241, trial: 0, score: 7 trial: 1, score: 8 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 7.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 51.1782, -69.8748, 53.8477, 45, -7.4502, 33.4915, -10, 113.811, -83.9679, -7.90749, 122, 6.52905, -74.4034, 4.4316, 30.6313, -5.7248, -24.3393, -10, 23.5181, -30.6788, -1.06027, 107.958, 44.6264, -31.9894, -12.149, -15, -30.5632, -22.0487, -10, 48.2557, -28.8943, 3.8529, 107.798, 19.7886, -38.2091, -15.27, -10.7936, -12.1074, 59.4304, -4.969, 69.806, 7.01041, 8.87012, 82.3943, 16.9558, -50.422, -9.97781, 1.01791, -11.9879, 116.881, -5.12152, 89.7461, -27.3653, -0.282521, 103.47, 34.4815, -61.4432, -15.9602, -3.12608, -12.0018, -4.76383, 0.102992, 54.3481, 21.2289, -1.70201, 91.995, Average Score: 0 trial: 0, score: 8 trial: 1, score: 0 Policy 2: 46.4804, -66.1279, 24.861, 45, -12.5662, 18.2349, -2.32779, 98.0204, -73.8207, -4.59509, 122, 2.18599, -75, -3.47938, 36.7393, -5.35496, -22.8755, -10, 27.5334, -10.8532, -3.14212, 99.3462, 41.9866, -23.3109, -6.40591, -15, -37.3345, -40.8845, -10, 47.3893, -56.4022, -10, 102.167, 31.9849, -41.2399, 13.178, -12.1278, -11.5997, 65.3158, -6.81839, 54.041, 32.1951, 5.69117, 93.4518, 16.3827, -49.4302, -30.3696, -1.1658, -11.9504, 127.857, 6.97285, 76.4035, -13.2265, 2.37955, 115.99, 47.2593, -69.6495, -19.0047, -2.44393, -3, -2.29675, -6.88776, 32.9167, 41.9209, -10, 94.1562, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 55.3339, -61.1265, 45.763, 42.5468, -8.00365, 4.37545, -0.559797, 122, -57.0659, -10, 114.362, 0.281827, -75, 10.6876, 32.6449, -6.4625, -22.4216, -5.96001, 17.2128, -35.8611, -7.44116, 122, 43.478, -33.9779, -8.20258, -11.8441, -34.0211, -45.8552, -7.87614, 58.3919, -24.4418, -4.75895, 113.184, 37.2174, -35.8973, 11.6649, -10.1032, -4.22209, 35.515, -6.97294, 78.3722, 2.98737, 6.97556, 95.5067, 23.8206, -50.266, -19.4376, -7.38852, -10.8636, 105.235, 10.9373, 75.7108, -3.5288, -10, 93.222, 44.2351, -69.037, -28.1193, -10.4046, -10.8091, 15.0673, -10, 45.3169, 9.57843, -9.8503, 92.4835, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 47.8447, -72.2919, 32.4305, 41.6183, -4.87708, 29.0945, -9.85069, 112.62, -78.8951, -0.693545, 122, 6.19027, -75, -4.13662, 33.6588, -3.38494, -38.7585, -7.90734, 38.9299, -27.7598, -9.82648, 109.549, 48.2165, -23.5854, -28.2332, -15, -36.5074, -8.75856, -10, 54.9831, -56.8377, 3.04579, 104.322, 19.9194, -42.4797, 8.6346, -7.2916, -9.88253, 76.2981, -5.94681, 74.3642, 3.48436, 17.5838, 96.674, 32.3128, -42.2047, -33.1536, -3.8336, -10.7954, 93.7377, 10.9934, 79.2904, -12.99, 3.7663, 117.734, 39.4337, -62.3211, -18.6267, -12.4655, -3.38417, 2.45151, 2.32756, 55.2153, 11.213, -2.96933, 100.241, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 7.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 44.4834, -61.9913, 42.2436, 45, -13.1009, 11.0647, -1.21196, 103.501, -66.9435, -7.26168, 122, 6.62483, -75, 11.2017, 33.738, -6.8932, -45.1597, -10, 32.7912, -30.6848, -4.48807, 109.188, 45.2694, -35.381, -4.8239, -15, -29.8634, -26.7458, -8.97164, 42.164, -46.5424, -10, 103.451, 21.6695, -41.958, 9.84077, -6.90227, -8.86454, 32.6548, -0.453706, 55.2614, 22.7533, 11.4373, 74.8896, 29.6846, -47.3372, -21.9896, -7.48478, -8.34598, 91.7443, 5.74177, 94.6965, 9.88331, -2.41991, 95.1871, 46.5323, -69.1021, -37.9528, -8.14001, -3, -24.0262, -10, 39.0981, 41.8013, -6.78356, 97.2662, Policy 2 will be: 39.4605, -67.0827, 44.7442, 45, -10.8968, 44.5049, -3.89155, 114.204, -66.2874, -10, 113.523, 4.95238, -71.6462, -9.566, 39.6631, -5.27707, -68.4363, -1.57724, 36.7558, -48.2608, -10, 106.386, 48.3844, -34.8987, -32.2786, -15, -32.3142, -35.372, -10, 47.7102, -61.1041, -6.35992, 96.6703, 35.3171, -31.5801, -3.14955, -14.2741, -12.5739, 69.331, -9.20565, 58.1378, 25.2995, 3.16657, 90.2573, 18.1653, -50.6101, -40.0071, -5.39718, -17.6579, 103.853, 2.34152, 73.9413, 15.7206, -10, 114.377, 31.8901, -72.4693, -37.3338, -10.1242, -3, -8.30505, -10, 55.7008, 48.6923, -0.34941, 88.6401, Policy 3 will be: 50.3334, -70.6215, 43.6836, 45, -10.3141, 42.8439, -6.67009, 108.77, -74.0927, -10, 122, 5.39204, -67.9152, -1.2195, 29.8686, -8.19658, -62.9921, -10, 25.8943, -51.1385, 4.10185, 119.501, 40.9591, -31.1092, -10.122, -15, -33.3741, -16.1793, -2.39876, 59.2666, -25.96, -6.23719, 117.894, 36.2335, -38.4624, -16.3274, -11.6152, -14.4055, 75.9568, -10, 72.2452, 39.4142, 17.2594, 87.105, 25.7569, -42.7752, -10.831, 1.09198, -14.5753, 106.133, 10.0987, 73.9915, 2.40298, -3.81762, 117.648, 37.915, -61.633, -37.6027, -7.43161, -4.66291, 11.9756, 1.58671, 44.9186, 21.7939, -10, 93.8403, Policy 4 will be: 48.0494, -68.7305, 40.4722, 45, -8.01117, 14.0587, -3.59312, 105.837, -58.6759, -2.07506, 117.5, 1.48348, -74.53, -5.51189, 29.0682, -3, -31.2501, -7.85237, 30.7176, -47.6885, -6.71875, 111.474, 49.8165, -31.9819, -6.39345, -9.54014, -37.1842, -37.823, -10, 37.4816, -56.1782, -6.4371, 100.44, 29.259, -42.4244, 1.98363, -4.39138, -13.4799, 32.5599, 3.63475, 55.5757, 20.9572, 2.04129, 90.9458, 29.6505, -51.5629, -13.7502, -6.11707, -15.6186, 107.671, -6.8327, 86.6544, -21.0652, -4.4663, 107.975, 47.1242, -71.0111, -14.7765, -1.74631, -6.07532, 21.2596, -10, 53.7876, 43.2992, -10, 76.2943, trial: 0, score: 5 trial: 1, score: 7 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 6 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 44.4834, -61.9913, 42.2436, 45, -13.1009, 11.0647, -1.21196, 103.501, -66.9435, -7.26168, 122, 6.62483, -75, 11.2017, 33.738, -6.8932, -45.1597, -10, 32.7912, -30.6848, -4.48807, 109.188, 45.2694, -35.381, -4.8239, -15, -29.8634, -26.7458, -8.97164, 42.164, -46.5424, -10, 103.451, 21.6695, -41.958, 9.84077, -6.90227, -8.86454, 32.6548, -0.453706, 55.2614, 22.7533, 11.4373, 74.8896, 29.6846, -47.3372, -21.9896, -7.48478, -8.34598, 91.7443, 5.74177, 94.6965, 9.88331, -2.41991, 95.1871, 46.5323, -69.1021, -37.9528, -8.14001, -3, -24.0262, -10, 39.0981, 41.8013, -6.78356, 97.2662, Average Score: 0 trial: 0, score: 6 trial: 1, score: 4 Policy 2: 39.4605, -67.0827, 44.7442, 45, -10.8968, 44.5049, -3.89155, 114.204, -66.2874, -10, 113.523, 4.95238, -71.6462, -9.566, 39.6631, -5.27707, -68.4363, -1.57724, 36.7558, -48.2608, -10, 106.386, 48.3844, -34.8987, -32.2786, -15, -32.3142, -35.372, -10, 47.7102, -61.1041, -6.35992, 96.6703, 35.3171, -31.5801, -3.14955, -14.2741, -12.5739, 69.331, -9.20565, 58.1378, 25.2995, 3.16657, 90.2573, 18.1653, -50.6101, -40.0071, -5.39718, -17.6579, 103.853, 2.34152, 73.9413, 15.7206, -10, 114.377, 31.8901, -72.4693, -37.3338, -10.1242, -3, -8.30505, -10, 55.7008, 48.6923, -0.34941, 88.6401, Average Score: 5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 50.3334, -70.6215, 43.6836, 45, -10.3141, 42.8439, -6.67009, 108.77, -74.0927, -10, 122, 5.39204, -67.9152, -1.2195, 29.8686, -8.19658, -62.9921, -10, 25.8943, -51.1385, 4.10185, 119.501, 40.9591, -31.1092, -10.122, -15, -33.3741, -16.1793, -2.39876, 59.2666, -25.96, -6.23719, 117.894, 36.2335, -38.4624, -16.3274, -11.6152, -14.4055, 75.9568, -10, 72.2452, 39.4142, 17.2594, 87.105, 25.7569, -42.7752, -10.831, 1.09198, -14.5753, 106.133, 10.0987, 73.9915, 2.40298, -3.81762, 117.648, 37.915, -61.633, -37.6027, -7.43161, -4.66291, 11.9756, 1.58671, 44.9186, 21.7939, -10, 93.8403, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 48.0494, -68.7305, 40.4722, 45, -8.01117, 14.0587, -3.59312, 105.837, -58.6759, -2.07506, 117.5, 1.48348, -74.53, -5.51189, 29.0682, -3, -31.2501, -7.85237, 30.7176, -47.6885, -6.71875, 111.474, 49.8165, -31.9819, -6.39345, -9.54014, -37.1842, -37.823, -10, 37.4816, -56.1782, -6.4371, 100.44, 29.259, -42.4244, 1.98363, -4.39138, -13.4799, 32.5599, 3.63475, 55.5757, 20.9572, 2.04129, 90.9458, 29.6505, -51.5629, -13.7502, -6.11707, -15.6186, 107.671, -6.8327, 86.6544, -21.0652, -4.4663, 107.975, 47.1242, -71.0111, -14.7765, -1.74631, -6.07532, 21.2596, -10, 53.7876, 43.2992, -10, 76.2943, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 6 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 38.9088, -73.0595, 27.5437, 45, -6.64845, 41.6173, -1.49458, 106.702, -105.782, -4.75013, 116.316, 19.1894, -75, 2.08425, 34.0337, -6.90868, -59.4085, -10, 21.2246, -39.0649, -5.88414, 106.575, 50.1805, -23.8103, -27.5724, -8.97878, -40.0071, -47.7798, -9.16311, 38.9202, -66.8979, -1.94708, 94.5216, 36.2216, -31.8047, -16.296, -12.9051, -8.1267, 29.0807, -9.43447, 66.6114, -7.6444, 16.4493, 85.6496, 18.0424, -44.0418, -25.9889, -8.27756, -12.9428, 123.752, 1.34852, 88.1912, -27.8488, -10, 102.773, 32.2884, -72.4798, -37.6533, -7.41762, -3.8315, -19.204, -5.8527, 50.9202, 25.5396, -10, 85.5548, Policy 2 will be: 43.6402, -60.2084, 46.807, 39.5911, -4.99126, 13.2221, -7.89826, 103.121, -66.3063, 0.094206, 119.348, 6.2564, -72.763, -1.98842, 31.1603, -8.65068, -62.3917, -10, 31.1121, -22.7041, -10, 108.21, 52.9045, -33.5391, 1.40872, -11.8898, -33.0109, -37.09, -8.67796, 53.2986, -42.0766, -1.83163, 100.866, 20.7635, -43.0209, 2.73686, -14.8055, -15.5457, 37.1494, -5.17303, 73.9509, 26.0731, 4.9805, 78.4882, 30.3511, -47.9337, -40.8368, 1.28185, -8.26941, 124.863, 1.54023, 82.749, 10.1561, -5.1758, 104.424, 40.3016, -68.3539, -30.8309, -10.5845, -7.70839, 12.4712, -10, 32.2844, 33.2977, -10, 99.2209, Policy 3 will be: 39.2905, -64.89, 58.1642, 45, -15.0775, 27.7518, -2.4769, 122, -98.3668, -9.31054, 111.625, 13.7097, -75, -3.5192, 39.8927, -3, -29.1972, -2.21534, 37.5022, -26.1349, -0.0919201, 110.733, 56.7146, -36.635, -8.04027, -15, -39.2966, -40.7405, -3.80763, 51.6446, -64.71, 3.49882, 110.453, 35.0847, -39.6492, 5.4428, -5.68793, -5.83533, 58.7751, -10, 78.038, 1.14776, 10.9263, 87.6858, 21.9735, -50.7986, -34.8882, -1.58992, -13.5645, 98.5352, 12.1414, 85.127, -26.355, -10, 111.032, 41.1224, -71.1802, -43.9351, -6.4947, -7.00764, -7.34359, -10, 39.2812, 54.8945, -10, 74.9711, Policy 4 will be: 36.4037, -72.287, 44.6401, 45, -7.45552, 22.7869, -6.50395, 114.664, -74.3559, 0.990473, 117.497, 19.8684, -72.4695, 11.594, 34.6194, -3, -51.04, -5.02624, 23.7967, -30.02, 1.13829, 114.35, 47.6419, -31.7631, -32.291, -11.8854, -35.8965, -18.1824, -10, 39.6676, -51.894, -5.20495, 110.53, 19.025, -37.8934, -6.57335, -9.84502, -11.3617, 43.7178, -8.47786, 55.2465, 15.3101, 6.04942, 87.3321, 21.87, -46.298, -30.3632, -6.94463, -8.00471, 93.733, 8.9422, 74.71, -22.0737, -10, 105.09, 33.0482, -64.5609, -22.4517, -2.48634, -9.4017, 1.83047, -10, 60.6223, 29.3877, -10, 92.3051, trial: 0, score: 8 trial: 1, score: 9 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 8.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 38.9088, -73.0595, 27.5437, 45, -6.64845, 41.6173, -1.49458, 106.702, -105.782, -4.75013, 116.316, 19.1894, -75, 2.08425, 34.0337, -6.90868, -59.4085, -10, 21.2246, -39.0649, -5.88414, 106.575, 50.1805, -23.8103, -27.5724, -8.97878, -40.0071, -47.7798, -9.16311, 38.9202, -66.8979, -1.94708, 94.5216, 36.2216, -31.8047, -16.296, -12.9051, -8.1267, 29.0807, -9.43447, 66.6114, -7.6444, 16.4493, 85.6496, 18.0424, -44.0418, -25.9889, -8.27756, -12.9428, 123.752, 1.34852, 88.1912, -27.8488, -10, 102.773, 32.2884, -72.4798, -37.6533, -7.41762, -3.8315, -19.204, -5.8527, 50.9202, 25.5396, -10, 85.5548, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 43.6402, -60.2084, 46.807, 39.5911, -4.99126, 13.2221, -7.89826, 103.121, -66.3063, 0.094206, 119.348, 6.2564, -72.763, -1.98842, 31.1603, -8.65068, -62.3917, -10, 31.1121, -22.7041, -10, 108.21, 52.9045, -33.5391, 1.40872, -11.8898, -33.0109, -37.09, -8.67796, 53.2986, -42.0766, -1.83163, 100.866, 20.7635, -43.0209, 2.73686, -14.8055, -15.5457, 37.1494, -5.17303, 73.9509, 26.0731, 4.9805, 78.4882, 30.3511, -47.9337, -40.8368, 1.28185, -8.26941, 124.863, 1.54023, 82.749, 10.1561, -5.1758, 104.424, 40.3016, -68.3539, -30.8309, -10.5845, -7.70839, 12.4712, -10, 32.2844, 33.2977, -10, 99.2209, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 39.2905, -64.89, 58.1642, 45, -15.0775, 27.7518, -2.4769, 122, -98.3668, -9.31054, 111.625, 13.7097, -75, -3.5192, 39.8927, -3, -29.1972, -2.21534, 37.5022, -26.1349, -0.0919201, 110.733, 56.7146, -36.635, -8.04027, -15, -39.2966, -40.7405, -3.80763, 51.6446, -64.71, 3.49882, 110.453, 35.0847, -39.6492, 5.4428, -5.68793, -5.83533, 58.7751, -10, 78.038, 1.14776, 10.9263, 87.6858, 21.9735, -50.7986, -34.8882, -1.58992, -13.5645, 98.5352, 12.1414, 85.127, -26.355, -10, 111.032, 41.1224, -71.1802, -43.9351, -6.4947, -7.00764, -7.34359, -10, 39.2812, 54.8945, -10, 74.9711, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 36.4037, -72.287, 44.6401, 45, -7.45552, 22.7869, -6.50395, 114.664, -74.3559, 0.990473, 117.497, 19.8684, -72.4695, 11.594, 34.6194, -3, -51.04, -5.02624, 23.7967, -30.02, 1.13829, 114.35, 47.6419, -31.7631, -32.291, -11.8854, -35.8965, -18.1824, -10, 39.6676, -51.894, -5.20495, 110.53, 19.025, -37.8934, -6.57335, -9.84502, -11.3617, 43.7178, -8.47786, 55.2465, 15.3101, 6.04942, 87.3321, 21.87, -46.298, -30.3632, -6.94463, -8.00471, 93.733, 8.9422, 74.71, -22.0737, -10, 105.09, 33.0482, -64.5609, -22.4517, -2.48634, -9.4017, 1.83047, -10, 60.6223, 29.3877, -10, 92.3051, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 8.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 47.0159, -62.2394, 46.7812, 38.8375, -15.0831, 32.4728, -10, 119.386, -98.8501, -7.38872, 116.945, 12.1861, -75, 6.04872, 32.267, -3, -68.4232, -10, 41.9998, -19.0579, 3.49727, 98.46, 55.6628, -25.8085, -15.0391, -13.567, -37.2087, -18.3909, -3.9891, 39.7371, -46.79, 3.67094, 119.309, 25.9115, -41.2286, 3.94774, -4.10546, -8.66469, 47.5364, -9.59999, 75.3272, 34.5248, 4.23006, 69.342, 29.6195, -46.7972, -19.2284, -8.24979, -9.64531, 105.871, -0.670551, 76.0346, -20.8006, 2.68812, 113.217, 35.8058, -62.5828, -10.2193, -9.48467, -3, 5.71489, 0.0864019, 43.6777, 14.4005, -10, 83.4209, Policy 2 will be: 46.2109, -64.019, 45.7109, 44.6823, -5.02342, 8.8791, -0.994736, 110.151, -65.6712, -8.26578, 116.649, 10.6338, -75, -9.12091, 32.4372, -4.81777, -49.4846, -7.84243, 33.9683, -43.6777, 4.86331, 97.8076, 51.3087, -26.6266, -4.45565, -15, -33.4083, -37.4004, -10, 62.2636, -54.9759, -10, 111.698, 30.7028, -36.9813, 11.3568, -6.54795, -13.0215, 30.7898, -4.69332, 67.7826, 18.4496, 11.9551, 72.8279, 29.6532, -40.986, -10.8982, -0.250395, -10.0321, 130, 1.32781, 75.3806, 4.22416, -9.37481, 102.054, 36.0088, -74.268, -19.5053, -6.79538, -9.96599, -1.90054, -10, 33.1373, 43.9371, -4.23674, 96.7122, Policy 3 will be: 53.4716, -66.7557, 29.9767, 44.1331, -4.96459, 24.3895, -3.88544, 121.479, -60.7117, -10, 122, 12.1088, -72.891, -16.7361, 39.0322, -3, -48.5546, -10, 44.3838, -21.6201, -4.18363, 97.1434, 57.9936, -36.6622, 2.03363, -15, -31.4389, -28.4625, -10, 61.2853, -67.3948, -10, 113.111, 36.0603, -37.1591, -6.3985, -7.45424, -5.08434, 39.3838, 1.38346, 72.1983, -1.8567, 8.95409, 86.9069, 22.6102, -37.8108, -14.294, -3.32481, -14.4242, 97.4597, 3.46986, 96.4924, -24.5442, -1.04277, 112.882, 32.7918, -65.5596, -14.8989, -11.0892, -12.5839, -5.28034, -10, 44.9845, 43.3109, -10, 87.4571, Policy 4 will be: 47.1944, -66.8532, 47.2846, 45, -7.96259, 44.4745, -10, 120.437, -104.649, -8.99687, 122, 12.7988, -75, -22.1399, 36.3769, -3, -23.6954, -1.94208, 41.8942, -56.9488, 1.89524, 116.157, 45.5064, -29.1355, -29.5675, -11.0137, -30.5924, -21.0269, -10, 56.5974, -55.6935, -2.02305, 108.359, 22.544, -30.4022, -15.9893, -4.20953, -9.04685, 70.3538, 3.85532, 67.3799, 37.8158, 16.032, 81.9492, 25.8343, -50.0729, -10.5014, -6.86771, -13.9827, 107.531, 8.90199, 73.5352, -12.2681, 1.59816, 96.9073, 46.9078, -73.16, -19.6475, -8.18349, -3, 14.3266, -3.7255, 35.1028, 28.2081, -10, 101.07, trial: 0, score: 6 trial: 1, score: 7 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 6.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 47.0159, -62.2394, 46.7812, 38.8375, -15.0831, 32.4728, -10, 119.386, -98.8501, -7.38872, 116.945, 12.1861, -75, 6.04872, 32.267, -3, -68.4232, -10, 41.9998, -19.0579, 3.49727, 98.46, 55.6628, -25.8085, -15.0391, -13.567, -37.2087, -18.3909, -3.9891, 39.7371, -46.79, 3.67094, 119.309, 25.9115, -41.2286, 3.94774, -4.10546, -8.66469, 47.5364, -9.59999, 75.3272, 34.5248, 4.23006, 69.342, 29.6195, -46.7972, -19.2284, -8.24979, -9.64531, 105.871, -0.670551, 76.0346, -20.8006, 2.68812, 113.217, 35.8058, -62.5828, -10.2193, -9.48467, -3, 5.71489, 0.0864019, 43.6777, 14.4005, -10, 83.4209, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 46.2109, -64.019, 45.7109, 44.6823, -5.02342, 8.8791, -0.994736, 110.151, -65.6712, -8.26578, 116.649, 10.6338, -75, -9.12091, 32.4372, -4.81777, -49.4846, -7.84243, 33.9683, -43.6777, 4.86331, 97.8076, 51.3087, -26.6266, -4.45565, -15, -33.4083, -37.4004, -10, 62.2636, -54.9759, -10, 111.698, 30.7028, -36.9813, 11.3568, -6.54795, -13.0215, 30.7898, -4.69332, 67.7826, 18.4496, 11.9551, 72.8279, 29.6532, -40.986, -10.8982, -0.250395, -10.0321, 130, 1.32781, 75.3806, 4.22416, -9.37481, 102.054, 36.0088, -74.268, -19.5053, -6.79538, -9.96599, -1.90054, -10, 33.1373, 43.9371, -4.23674, 96.7122, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 53.4716, -66.7557, 29.9767, 44.1331, -4.96459, 24.3895, -3.88544, 121.479, -60.7117, -10, 122, 12.1088, -72.891, -16.7361, 39.0322, -3, -48.5546, -10, 44.3838, -21.6201, -4.18363, 97.1434, 57.9936, -36.6622, 2.03363, -15, -31.4389, -28.4625, -10, 61.2853, -67.3948, -10, 113.111, 36.0603, -37.1591, -6.3985, -7.45424, -5.08434, 39.3838, 1.38346, 72.1983, -1.8567, 8.95409, 86.9069, 22.6102, -37.8108, -14.294, -3.32481, -14.4242, 97.4597, 3.46986, 96.4924, -24.5442, -1.04277, 112.882, 32.7918, -65.5596, -14.8989, -11.0892, -12.5839, -5.28034, -10, 44.9845, 43.3109, -10, 87.4571, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 47.1944, -66.8532, 47.2846, 45, -7.96259, 44.4745, -10, 120.437, -104.649, -8.99687, 122, 12.7988, -75, -22.1399, 36.3769, -3, -23.6954, -1.94208, 41.8942, -56.9488, 1.89524, 116.157, 45.5064, -29.1355, -29.5675, -11.0137, -30.5924, -21.0269, -10, 56.5974, -55.6935, -2.02305, 108.359, 22.544, -30.4022, -15.9893, -4.20953, -9.04685, 70.3538, 3.85532, 67.3799, 37.8158, 16.032, 81.9492, 25.8343, -50.0729, -10.5014, -6.86771, -13.9827, 107.531, 8.90199, 73.5352, -12.2681, 1.59816, 96.9073, 46.9078, -73.16, -19.6475, -8.18349, -3, 14.3266, -3.7255, 35.1028, 28.2081, -10, 101.07, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 6.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 46.8952, -68.8956, 33.4938, 43.5019, -4.11193, 25.8328, -10, 105.659, -98.4314, -7.40007, 114.569, 1.61258, -75, 7.61286, 35.1839, -6.62761, -26.3003, -10, 21.066, -19.3565, 0.368389, 114.314, 53.7528, -28.5565, -29.4481, -14.1068, -30.8419, -20.5387, -2.15645, 60.2469, -51.0945, -7.9509, 111.934, 34.4701, -30.8926, -13.9255, -13.2001, -10.9337, 72.2561, 3.18778, 62.9602, 32.9562, 7.78136, 75.0168, 21.8722, -46.3223, -11.0745, 1.32411, -13.3892, 93.466, 5.68011, 78.1803, 8.69664, -0.076694, 115.35, 32.8367, -71.9824, -35.658, -12.2212, -6.49371, 12.1884, -10, 47.9073, 34.9619, -9.83075, 90.2209, Policy 2 will be: 54.8432, -70.3097, 57.6593, 40.7079, -8.02093, 45.4429, -10, 122, -68.0448, -1.18097, 122, 11.8261, -73.6817, -17.773, 39.7003, -8.56699, -39.5219, -5.96408, 41.5618, -22.7547, -3.06911, 103.89, 60.0236, -29.2699, -32.2163, -15, -38.6895, -42.8702, -2.68205, 59.8849, -54.48, 3.71091, 116.229, 23.0977, -35.6333, -5.40142, -4.1749, -11.1431, 66.361, -10, 64.6559, 17.7407, 16.7086, 83.6413, 26.4136, -43.2311, -25.6824, 0.819751, -6.57607, 126.127, -1.50982, 73.0998, -30.7272, -5.17711, 92.9911, 35.541, -62.8031, -19.3853, -8.5496, -9.05842, 19.8858, -10, 45.6004, 55.4999, -7.12807, 82.0478, Policy 3 will be: 52.9573, -68.4745, 30.7709, 45, -5.14922, 33.2851, -10, 107.342, -64.2595, -0.0439014, 108.161, 6.27353, -75, -18.0873, 37.0888, -3.60584, -58.9775, -2.01252, 35.5248, -25.2225, -0.394912, 121.707, 57.2569, -34.4965, -5.57017, -9.13949, -30.9313, -46.6941, -4.9576, 47.5128, -30.0604, -0.156584, 93.9708, 29.9115, -37.553, -4.12146, -12.6924, -4.40345, 58.0899, 2.18149, 79.718, 32.6424, 20.3939, 82.0665, 25.6095, -44.3109, -32.8472, 2.08678, -16.1292, 128.53, -0.263942, 75.7743, -25.0022, 3.55257, 93.3969, 42.4221, -66.0531, -25.431, -11.862, -4.98977, -17.6382, -10, 57.6407, 31.5648, -5.80128, 83.4186, Policy 4 will be: 41.713, -59.7754, 51.1414, 45, -6.99722, 21.6982, -3.07115, 98.4409, -85.3474, -8.05772, 120.307, 5.42397, -75, -13.3189, 39.7286, -6.14319, -41.5345, -10, 20.7441, -40.8613, -0.0863415, 115.513, 41.1467, -35.8823, -12.979, -13.9606, -31.7051, -16.7619, -10, 38.0077, -55.363, -5.46768, 107.72, 36.9891, -33.5487, -11.7046, -8.4188, -9.45746, 30.6928, -0.577794, 71.9287, 20.8033, 12.4576, 71.7113, 22.7673, -37.7767, -39.2251, -5.74853, -13.6363, 86.5604, 12.3652, 79.5385, -28.0083, -0.822387, 109.429, 47.4321, -65.7808, -12.7158, -8.7777, -3, 3.81088, -9.31097, 57.0782, 44.6492, -1.83798, 101.339, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 6 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 46.8952, -68.8956, 33.4938, 43.5019, -4.11193, 25.8328, -10, 105.659, -98.4314, -7.40007, 114.569, 1.61258, -75, 7.61286, 35.1839, -6.62761, -26.3003, -10, 21.066, -19.3565, 0.368389, 114.314, 53.7528, -28.5565, -29.4481, -14.1068, -30.8419, -20.5387, -2.15645, 60.2469, -51.0945, -7.9509, 111.934, 34.4701, -30.8926, -13.9255, -13.2001, -10.9337, 72.2561, 3.18778, 62.9602, 32.9562, 7.78136, 75.0168, 21.8722, -46.3223, -11.0745, 1.32411, -13.3892, 93.466, 5.68011, 78.1803, 8.69664, -0.076694, 115.35, 32.8367, -71.9824, -35.658, -12.2212, -6.49371, 12.1884, -10, 47.9073, 34.9619, -9.83075, 90.2209, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 54.8432, -70.3097, 57.6593, 40.7079, -8.02093, 45.4429, -10, 122, -68.0448, -1.18097, 122, 11.8261, -73.6817, -17.773, 39.7003, -8.56699, -39.5219, -5.96408, 41.5618, -22.7547, -3.06911, 103.89, 60.0236, -29.2699, -32.2163, -15, -38.6895, -42.8702, -2.68205, 59.8849, -54.48, 3.71091, 116.229, 23.0977, -35.6333, -5.40142, -4.1749, -11.1431, 66.361, -10, 64.6559, 17.7407, 16.7086, 83.6413, 26.4136, -43.2311, -25.6824, 0.819751, -6.57607, 126.127, -1.50982, 73.0998, -30.7272, -5.17711, 92.9911, 35.541, -62.8031, -19.3853, -8.5496, -9.05842, 19.8858, -10, 45.6004, 55.4999, -7.12807, 82.0478, Average Score: 0 trial: 0, score: 6 trial: 1, score: 0 Policy 3: 52.9573, -68.4745, 30.7709, 45, -5.14922, 33.2851, -10, 107.342, -64.2595, -0.0439014, 108.161, 6.27353, -75, -18.0873, 37.0888, -3.60584, -58.9775, -2.01252, 35.5248, -25.2225, -0.394912, 121.707, 57.2569, -34.4965, -5.57017, -9.13949, -30.9313, -46.6941, -4.9576, 47.5128, -30.0604, -0.156584, 93.9708, 29.9115, -37.553, -4.12146, -12.6924, -4.40345, 58.0899, 2.18149, 79.718, 32.6424, 20.3939, 82.0665, 25.6095, -44.3109, -32.8472, 2.08678, -16.1292, 128.53, -0.263942, 75.7743, -25.0022, 3.55257, 93.3969, 42.4221, -66.0531, -25.431, -11.862, -4.98977, -17.6382, -10, 57.6407, 31.5648, -5.80128, 83.4186, Average Score: 3 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 41.713, -59.7754, 51.1414, 45, -6.99722, 21.6982, -3.07115, 98.4409, -85.3474, -8.05772, 120.307, 5.42397, -75, -13.3189, 39.7286, -6.14319, -41.5345, -10, 20.7441, -40.8613, -0.0863415, 115.513, 41.1467, -35.8823, -12.979, -13.9606, -31.7051, -16.7619, -10, 38.0077, -55.363, -5.46768, 107.72, 36.9891, -33.5487, -11.7046, -8.4188, -9.45746, 30.6928, -0.577794, 71.9287, 20.8033, 12.4576, 71.7113, 22.7673, -37.7767, -39.2251, -5.74853, -13.6363, 86.5604, 12.3652, 79.5385, -28.0083, -0.822387, 109.429, 47.4321, -65.7808, -12.7158, -8.7777, -3, 3.81088, -9.31097, 57.0782, 44.6492, -1.83798, 101.339, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 6 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 50.3136, -72.1228, 39.3474, 45, -9.25126, 19.5995, -3.1005, 113.969, -65.5454, -10, 114.596, 13.4232, -72.0972, 0.248901, 36.5926, -3, -23.3382, -6.32024, 24.5712, -44.6252, -9.71326, 99.8455, 43.2025, -36.6484, -5.26464, -11.7924, -34.6287, -2.15201, -10, 42.9578, -61.6276, 1.87991, 114.814, 18.1461, -38.9464, -17.3546, -15, -13.0123, 66.7365, -3.10035, 60.0649, 32.4711, 14.0666, 85.4447, 22.2937, -46.7782, -11.28, 1.49833, -9.40348, 102.6, 5.54244, 94.7668, -19.9142, -7.28147, 105.419, 35.3321, -71.78, -38.0342, -7.11605, -3.68688, -0.340952, -3.9712, 51.74, 28.7715, -10, 83.8565, Policy 2 will be: 54.5639, -67.8397, 40.1676, 38.9663, -5.27738, 37.882, -3.84954, 108.557, -101.993, -10, 122, 10.4091, -72.1137, -20.6481, 39.5742, -6.19808, -53.1324, -10, 44.9397, -16.3091, 2.99757, 99.0683, 46.1815, -34.9458, -30.2302, -11.9499, -41.1213, -11.9813, -6.25499, 37.6041, -63.6617, -1.87737, 91.7623, 25.5697, -36.0064, -1.37179, -5.62116, -10.2417, 73.0946, -9.56899, 62.4988, -7.27427, 4.47716, 96.6183, 33.6923, -42.3395, -39.5406, -3.76636, -8.62326, 113.802, 2.02912, 74.5495, -5.57688, -10, 110.735, 36.6777, -62.7515, -19.5813, -10.1176, -11.2267, 12.2604, -10, 35.578, 37.6154, -4.34186, 80.9015, Policy 3 will be: 55.4033, -62.9243, 48.8577, 43.7062, -4.57924, 22.8958, -9.0852, 113.039, -96.5457, -3.19675, 122, 0.485172, -68.565, 4.58162, 40.4926, -5.75405, -63.9777, -5.30459, 17.6726, -25.9305, -10, 113.144, 56.1211, -30.4142, -30.1525, -14.7308, -35.7844, -9.9029, -8.61306, 51.2299, -70.8794, -10, 96.4841, 27.4415, -36.1829, -11.7869, -3.99988, -8.3715, 75.3731, 1.42872, 81.0332, 16.2546, 2.48773, 70.34, 32.8343, -38.7746, -8.918, -5.74903, -6.64013, 100.328, -5.86037, 83.6883, -11.4452, -8.13461, 113.786, 46.2957, -63.191, -16.5283, -11.0851, -12.1518, 13.0567, -10, 36.1808, 9.84305, -10, 82.9321, Policy 4 will be: 45.646, -62.5931, 26.5577, 45, -3.67216, 14.0591, -9.74891, 113.655, -58.7895, -10, 111.272, 1.87547, -75, 6.27767, 34.6783, -6.19484, -48.6711, -10, 37.8652, -51.0825, -3.71977, 107.04, 55.2455, -23.539, -18.6069, -15, -40.3217, -2.65772, -3.44157, 52.2291, -22.9384, -10, 94.5093, 20.6794, -31.3211, 1.76565, -9.34796, -12.4045, 29.9957, -9.75886, 65.4149, 38.8073, 20.083, 87.853, 20.296, -41.6113, -39.3855, -6.80127, -14.9244, 114.202, 0.0795076, 85.3933, -23.4729, -10, 115.395, 51.1263, -63.0706, -12.6763, -11.4606, -5.78882, 17.2128, -3.34242, 56.5105, 14.3574, -10, 72.6687, trial: 0, score: 6 trial: 1, score: 5 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 5.5 trial: 0, score: 3 trial: 1, score: 0 Policy 1: 50.3136, -72.1228, 39.3474, 45, -9.25126, 19.5995, -3.1005, 113.969, -65.5454, -10, 114.596, 13.4232, -72.0972, 0.248901, 36.5926, -3, -23.3382, -6.32024, 24.5712, -44.6252, -9.71326, 99.8455, 43.2025, -36.6484, -5.26464, -11.7924, -34.6287, -2.15201, -10, 42.9578, -61.6276, 1.87991, 114.814, 18.1461, -38.9464, -17.3546, -15, -13.0123, 66.7365, -3.10035, 60.0649, 32.4711, 14.0666, 85.4447, 22.2937, -46.7782, -11.28, 1.49833, -9.40348, 102.6, 5.54244, 94.7668, -19.9142, -7.28147, 105.419, 35.3321, -71.78, -38.0342, -7.11605, -3.68688, -0.340952, -3.9712, 51.74, 28.7715, -10, 83.8565, Average Score: 1.5 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 54.5639, -67.8397, 40.1676, 38.9663, -5.27738, 37.882, -3.84954, 108.557, -101.993, -10, 122, 10.4091, -72.1137, -20.6481, 39.5742, -6.19808, -53.1324, -10, 44.9397, -16.3091, 2.99757, 99.0683, 46.1815, -34.9458, -30.2302, -11.9499, -41.1213, -11.9813, -6.25499, 37.6041, -63.6617, -1.87737, 91.7623, 25.5697, -36.0064, -1.37179, -5.62116, -10.2417, 73.0946, -9.56899, 62.4988, -7.27427, 4.47716, 96.6183, 33.6923, -42.3395, -39.5406, -3.76636, -8.62326, 113.802, 2.02912, 74.5495, -5.57688, -10, 110.735, 36.6777, -62.7515, -19.5813, -10.1176, -11.2267, 12.2604, -10, 35.578, 37.6154, -4.34186, 80.9015, Average Score: 0 trial: 0, score: 0 trial: 1, score: 2 Policy 3: 55.4033, -62.9243, 48.8577, 43.7062, -4.57924, 22.8958, -9.0852, 113.039, -96.5457, -3.19675, 122, 0.485172, -68.565, 4.58162, 40.4926, -5.75405, -63.9777, -5.30459, 17.6726, -25.9305, -10, 113.144, 56.1211, -30.4142, -30.1525, -14.7308, -35.7844, -9.9029, -8.61306, 51.2299, -70.8794, -10, 96.4841, 27.4415, -36.1829, -11.7869, -3.99988, -8.3715, 75.3731, 1.42872, 81.0332, 16.2546, 2.48773, 70.34, 32.8343, -38.7746, -8.918, -5.74903, -6.64013, 100.328, -5.86037, 83.6883, -11.4452, -8.13461, 113.786, 46.2957, -63.191, -16.5283, -11.0851, -12.1518, 13.0567, -10, 36.1808, 9.84305, -10, 82.9321, Average Score: 1 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 45.646, -62.5931, 26.5577, 45, -3.67216, 14.0591, -9.74891, 113.655, -58.7895, -10, 111.272, 1.87547, -75, 6.27767, 34.6783, -6.19484, -48.6711, -10, 37.8652, -51.0825, -3.71977, 107.04, 55.2455, -23.539, -18.6069, -15, -40.3217, -2.65772, -3.44157, 52.2291, -22.9384, -10, 94.5093, 20.6794, -31.3211, 1.76565, -9.34796, -12.4045, 29.9957, -9.75886, 65.4149, 38.8073, 20.083, 87.853, 20.296, -41.6113, -39.3855, -6.80127, -14.9244, 114.202, 0.0795076, 85.3933, -23.4729, -10, 115.395, 51.1263, -63.0706, -12.6763, -11.4606, -5.78882, 17.2128, -3.34242, 56.5105, 14.3574, -10, 72.6687, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 5.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 39.6621, -73.2674, 27.2066, 45, -4.98359, 48.4127, -10, 110.588, -75.5937, -10, 122, 3.82958, -68.7416, -8.46463, 37.4609, -7.29557, -57.9811, -10, 18.8456, -43.4986, -2.77893, 118.083, 50.0637, -37.8989, -29.6885, -8.0615, -32.771, -6.11016, -10, 34.4967, -26.6551, -6.53596, 103.472, 33.1966, -33.479, -4.59818, -13.9099, -9.21848, 46.3723, 0.557529, 62.653, -2.29243, 16.2911, 81.1728, 17.28, -48.916, -36.2068, -2.6056, -10.8475, 88.3878, 3.05529, 89.2176, -7.31256, 2.21244, 102.553, 33.3478, -63.5264, -24.8868, -1.49297, -12.6843, -23.67, -10, 58.1418, 15.2446, -2.2784, 84.46, Policy 2 will be: 40.7548, -62.5549, 47.8673, 45, -9.77496, 44.1764, -10, 103.919, -85.9197, -10, 122, 9.93607, -74.2523, -7.76149, 36.3238, -3, -35.206, -10, 39.8151, -40.3912, -8.34586, 121.957, 43.0869, -32.9883, -13.3034, -15, -30.5036, -15.0537, -2.01975, 33.7524, -67.6625, -10, 93.7639, 23.2675, -36.1497, 7.26247, -11.9808, -5.46627, 53.8038, 1.51739, 75.6855, 35.3633, 5.79411, 80.3498, 23.2839, -38.982, -15.0284, -4.9886, -17.9762, 118.559, 3.08193, 88.2715, -7.19274, -5.0473, 106.197, 45.2173, -65.9264, -34.6649, -2.93193, -6.82823, 8.35213, -10, 54.4464, 9.02057, -10, 74.4663, Policy 3 will be: 38.5063, -72.7096, 51.5257, 45, -6.49451, 13.7268, -10, 122, -67.6316, -10, 121.535, 7.47974, -72.5201, -1.42118, 35.318, -3, -56.3601, -10, 18.2966, -15.9436, -0.425134, 105.986, 56.964, -36.3771, -31.7251, -8.65502, -37.2004, -34.0166, -10, 51.0489, -25.3318, -5.76456, 118.277, 25.4558, -40.8154, 10.3683, -4.62155, -9.70807, 47.9168, -0.200485, 79.8858, 18.2256, 5.31614, 75.8397, 30.5388, -43.2279, -20.5866, -7.02874, -7.35948, 124.568, -1.31462, 90.4747, -7.47762, -4.50603, 102.751, 38.4639, -65.9269, -15.2972, -12.5714, -12.8477, -11.3146, -7.3176, 37.655, 15.079, -5.34005, 95.0348, Policy 4 will be: 46.9173, -61.9535, 43.4216, 45, -6.8127, 48.6234, -7.91874, 121.019, -88.3958, -5.82579, 122, 6.22304, -72.6356, -21.3168, 32.5649, -3, -43.7591, -10, 24.5185, -16.2608, -10, 121.724, 42.7118, -37.1359, -27.9292, -15, -31.1683, -2.81259, -3.71866, 61.9995, -55.7665, -0.49916, 99.1157, 20.3697, -36.756, -14.9193, -3.39501, -8.67741, 63.3867, -7.89312, 62.915, 19.1534, 4.68183, 70.182, 32.0215, -44.846, -29.2404, -2.41225, -9.63893, 91.0129, -5.63592, 76.268, 0.669119, -10, 105.927, 38.4385, -75, -16.2888, -3.3275, -5.80992, 10.1723, 0.895788, 38.9241, 38.5188, -10, 88.87, trial: 0, score: 7 trial: 1, score: 8 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 7.5 trial: 0, score: 2 trial: 1, score: 2 Policy 1: 39.6621, -73.2674, 27.2066, 45, -4.98359, 48.4127, -10, 110.588, -75.5937, -10, 122, 3.82958, -68.7416, -8.46463, 37.4609, -7.29557, -57.9811, -10, 18.8456, -43.4986, -2.77893, 118.083, 50.0637, -37.8989, -29.6885, -8.0615, -32.771, -6.11016, -10, 34.4967, -26.6551, -6.53596, 103.472, 33.1966, -33.479, -4.59818, -13.9099, -9.21848, 46.3723, 0.557529, 62.653, -2.29243, 16.2911, 81.1728, 17.28, -48.916, -36.2068, -2.6056, -10.8475, 88.3878, 3.05529, 89.2176, -7.31256, 2.21244, 102.553, 33.3478, -63.5264, -24.8868, -1.49297, -12.6843, -23.67, -10, 58.1418, 15.2446, -2.2784, 84.46, Average Score: 2 trial: 0, score: 5 trial: 1, score: 3 Policy 2: 40.7548, -62.5549, 47.8673, 45, -9.77496, 44.1764, -10, 103.919, -85.9197, -10, 122, 9.93607, -74.2523, -7.76149, 36.3238, -3, -35.206, -10, 39.8151, -40.3912, -8.34586, 121.957, 43.0869, -32.9883, -13.3034, -15, -30.5036, -15.0537, -2.01975, 33.7524, -67.6625, -10, 93.7639, 23.2675, -36.1497, 7.26247, -11.9808, -5.46627, 53.8038, 1.51739, 75.6855, 35.3633, 5.79411, 80.3498, 23.2839, -38.982, -15.0284, -4.9886, -17.9762, 118.559, 3.08193, 88.2715, -7.19274, -5.0473, 106.197, 45.2173, -65.9264, -34.6649, -2.93193, -6.82823, 8.35213, -10, 54.4464, 9.02057, -10, 74.4663, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 38.5063, -72.7096, 51.5257, 45, -6.49451, 13.7268, -10, 122, -67.6316, -10, 121.535, 7.47974, -72.5201, -1.42118, 35.318, -3, -56.3601, -10, 18.2966, -15.9436, -0.425134, 105.986, 56.964, -36.3771, -31.7251, -8.65502, -37.2004, -34.0166, -10, 51.0489, -25.3318, -5.76456, 118.277, 25.4558, -40.8154, 10.3683, -4.62155, -9.70807, 47.9168, -0.200485, 79.8858, 18.2256, 5.31614, 75.8397, 30.5388, -43.2279, -20.5866, -7.02874, -7.35948, 124.568, -1.31462, 90.4747, -7.47762, -4.50603, 102.751, 38.4639, -65.9269, -15.2972, -12.5714, -12.8477, -11.3146, -7.3176, 37.655, 15.079, -5.34005, 95.0348, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 46.9173, -61.9535, 43.4216, 45, -6.8127, 48.6234, -7.91874, 121.019, -88.3958, -5.82579, 122, 6.22304, -72.6356, -21.3168, 32.5649, -3, -43.7591, -10, 24.5185, -16.2608, -10, 121.724, 42.7118, -37.1359, -27.9292, -15, -31.1683, -2.81259, -3.71866, 61.9995, -55.7665, -0.49916, 99.1157, 20.3697, -36.756, -14.9193, -3.39501, -8.67741, 63.3867, -7.89312, 62.915, 19.1534, 4.68183, 70.182, 32.0215, -44.846, -29.2404, -2.41225, -9.63893, 91.0129, -5.63592, 76.268, 0.669119, -10, 105.927, 38.4385, -75, -16.2888, -3.3275, -5.80992, 10.1723, 0.895788, 38.9241, 38.5188, -10, 88.87, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 7.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 37.038, -67.2527, 44.487, 43.3848, -14.533, 48.7029, -10, 121.057, -73.5088, -10, 107.565, 11.1803, -75, -6.61351, 33.0856, -3, -35.5735, -10, 39.5762, -20.991, -10, 96.9058, 48.2299, -31.2027, -26.8981, -15, -31.7504, -25.6035, -10, 52.8733, -66.4472, -10, 100.012, 35.0898, -42.0494, 5.17177, -11.7831, -10.3292, 73.5544, 1.39466, 74.8127, 4.1071, 6.06581, 77.6599, 24.6169, -37.276, -34.7527, -0.73976, -8.51666, 115.289, 0.765582, 77.7178, -1.9076, -2.19827, 94.2017, 41.7421, -62.2468, -44.0762, -4.44451, -7.5622, 21.999, -1.20634, 54.5019, 24.8134, -1.30675, 80.4926, Policy 2 will be: 43.2098, -65.2279, 32.622, 45, -7.00603, 41.5053, -10, 111.776, -76.0512, -10, 117.059, 11.667, -74.0576, -12.3477, 37.7958, -3, -37.0704, -7.24173, 44.4259, -24.5422, -10, 104.94, 59.7194, -35.6985, -0.361808, -11.6238, -39.3101, -28.3078, -10, 57.5311, -25.1093, -5.41345, 108.806, 17.7856, -39.0922, -14.6263, -7.63185, -10.3016, 28.9999, 3.11021, 76.6123, 29.0026, 10.7095, 90.8796, 16.6289, -45.2481, -29.187, -0.225358, -8.44469, 130, 10.1548, 87.6247, -18.4697, -4.40857, 106.47, 50.6576, -66.458, -31.0584, -12.691, -10.0745, 20.802, 0.46701, 48.9361, 40.7567, -6.59054, 94.1334, Policy 3 will be: 48.143, -71.5664, 54.8368, 43.0079, -13.6183, 21.8264, -10, 118.196, -71.4331, -10, 122, 5.19374, -75, 11.4341, 30.995, -7.03624, -22.8608, -3.91294, 16.8412, -19.9301, 3.59977, 122, 43.6431, -37.2612, -5.56364, -15, -35.9599, -24.5139, -4.19367, 55.0991, -38.5266, -7.0229, 105.755, 19.9429, -31.4176, -11.5896, -8.6698, -11.9415, 42.0087, -8.07304, 82.7042, 0.552908, 14.4692, 70.7263, 18.0084, -51.9786, -14.9686, -7.89042, -12.4555, 90.0644, -0.510663, 72.3019, -12.8125, -10, 120.743, 51.2, -64.3473, -34.8213, -2.62679, -8.41965, 22.244, -10, 54.2113, 41.6933, -9.73674, 75.0597, Policy 4 will be: 39.4914, -61.2524, 40.3266, 43.5401, -11.3293, 28.0279, -10, 104.691, -94.0829, -10, 122, 11.6934, -68.7109, -17.4998, 39.4167, -4.66179, -27.9021, -4.20293, 43.5242, -50.6607, -1.28482, 109.146, 50.3264, -33.2695, -20.4223, -12.5241, -36.3272, -6.08397, -1.83366, 47.081, -65.3789, -2.158, 96.9296, 26.0075, -43.3656, 11.8603, -13.4866, -11.4704, 70.6941, 1.75844, 73.7876, 14.8776, 18.4007, 69.5104, 29.0786, -43.6663, -30.3193, -1.51377, -11.8682, 125.925, -5.21349, 87.5505, -20.2038, -9.67281, 103.498, 33.6233, -65.6342, -41.3817, -1.87782, -12.6392, -17.0708, -10, 47.4086, 26.0336, -10, 89.2326, trial: 0, score: 6 trial: 1, score: 7 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 6.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 37.038, -67.2527, 44.487, 43.3848, -14.533, 48.7029, -10, 121.057, -73.5088, -10, 107.565, 11.1803, -75, -6.61351, 33.0856, -3, -35.5735, -10, 39.5762, -20.991, -10, 96.9058, 48.2299, -31.2027, -26.8981, -15, -31.7504, -25.6035, -10, 52.8733, -66.4472, -10, 100.012, 35.0898, -42.0494, 5.17177, -11.7831, -10.3292, 73.5544, 1.39466, 74.8127, 4.1071, 6.06581, 77.6599, 24.6169, -37.276, -34.7527, -0.73976, -8.51666, 115.289, 0.765582, 77.7178, -1.9076, -2.19827, 94.2017, 41.7421, -62.2468, -44.0762, -4.44451, -7.5622, 21.999, -1.20634, 54.5019, 24.8134, -1.30675, 80.4926, Average Score: 0 trial: 0, score: 0 trial: 1, score: 2 Policy 2: 43.2098, -65.2279, 32.622, 45, -7.00603, 41.5053, -10, 111.776, -76.0512, -10, 117.059, 11.667, -74.0576, -12.3477, 37.7958, -3, -37.0704, -7.24173, 44.4259, -24.5422, -10, 104.94, 59.7194, -35.6985, -0.361808, -11.6238, -39.3101, -28.3078, -10, 57.5311, -25.1093, -5.41345, 108.806, 17.7856, -39.0922, -14.6263, -7.63185, -10.3016, 28.9999, 3.11021, 76.6123, 29.0026, 10.7095, 90.8796, 16.6289, -45.2481, -29.187, -0.225358, -8.44469, 130, 10.1548, 87.6247, -18.4697, -4.40857, 106.47, 50.6576, -66.458, -31.0584, -12.691, -10.0745, 20.802, 0.46701, 48.9361, 40.7567, -6.59054, 94.1334, Average Score: 1 trial: 0, score: 2 trial: 1, score: 3 Policy 3: 48.143, -71.5664, 54.8368, 43.0079, -13.6183, 21.8264, -10, 118.196, -71.4331, -10, 122, 5.19374, -75, 11.4341, 30.995, -7.03624, -22.8608, -3.91294, 16.8412, -19.9301, 3.59977, 122, 43.6431, -37.2612, -5.56364, -15, -35.9599, -24.5139, -4.19367, 55.0991, -38.5266, -7.0229, 105.755, 19.9429, -31.4176, -11.5896, -8.6698, -11.9415, 42.0087, -8.07304, 82.7042, 0.552908, 14.4692, 70.7263, 18.0084, -51.9786, -14.9686, -7.89042, -12.4555, 90.0644, -0.510663, 72.3019, -12.8125, -10, 120.743, 51.2, -64.3473, -34.8213, -2.62679, -8.41965, 22.244, -10, 54.2113, 41.6933, -9.73674, 75.0597, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 39.4914, -61.2524, 40.3266, 43.5401, -11.3293, 28.0279, -10, 104.691, -94.0829, -10, 122, 11.6934, -68.7109, -17.4998, 39.4167, -4.66179, -27.9021, -4.20293, 43.5242, -50.6607, -1.28482, 109.146, 50.3264, -33.2695, -20.4223, -12.5241, -36.3272, -6.08397, -1.83366, 47.081, -65.3789, -2.158, 96.9296, 26.0075, -43.3656, 11.8603, -13.4866, -11.4704, 70.6941, 1.75844, 73.7876, 14.8776, 18.4007, 69.5104, 29.0786, -43.6663, -30.3193, -1.51377, -11.8682, 125.925, -5.21349, 87.5505, -20.2038, -9.67281, 103.498, 33.6233, -65.6342, -41.3817, -1.87782, -12.6392, -17.0708, -10, 47.4086, 26.0336, -10, 89.2326, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Current Best Policy Score: 6.5 Policy 0 will be: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Policy 1 will be: 47.6833, -60.9224, 33.1986, 45, -8.11415, 30.155, -10, 114.171, -73.0672, -8.19966, 122, 15.2712, -68.0628, -1.41273, 39.2884, -3, -46.284, -4.55977, 30.1138, -28.9885, -8.39214, 95.3857, 42.5082, -31.4579, -2.05385, -15, -35.2197, -10.5643, -4.41705, 46.6649, -49.9369, -2.28316, 97.0553, 30.9929, -41.9987, 1.28711, -5.47896, -13.9374, 40.0262, -5.34869, 63.0207, -2.22282, 15.9162, 72.3204, 21.1993, -37.4079, -19.508, 1.26451, -11.9131, 116.857, 2.50694, 94.444, -1.51795, -10, 114.131, 45.9152, -71.0956, -42.2227, -3.27657, -10.8321, -3.58056, -6.57094, 31.4022, 23.3472, -4.73843, 77.1617, Policy 2 will be: 51.386, -70.055, 29.1653, 45, -9.91107, 49.7695, -2.39206, 118.585, -57.3304, -6.64031, 109.214, 12.7694, -75, -2.00889, 33.1026, -3, -51.4548, -10, 34.9571, -41.6483, -2.2086, 99.3573, 56.1282, -35.0164, -9.64023, -15, -31.8182, -11.3839, -2.82875, 49.6886, -46.5834, -4.8777, 107.367, 31.2154, -43.1864, 15.4575, -14.388, -5.40623, 59.6031, -10, 71.6188, 22.4371, 11.57, 78.7302, 30.5529, -49.9673, -20.0468, -2.30566, -17.4555, 123.863, 7.15001, 79.1963, -31.908, -6.26422, 104.058, 41.9011, -72.0387, -35.849, -1.3478, -10.9546, -14.4569, -6.87999, 46.2045, 18.1878, -10, 81.9883, Policy 3 will be: 39.8382, -60.7004, 36.3881, 41.5632, -10.4908, 6.91106, -10, 106.875, -97.4876, -9.54038, 116.843, 18.7635, -68.1229, -11.9917, 36.206, -7.63326, -50.2664, -10, 25.5986, -22.4697, -4.19706, 106.651, 41.0721, -30.0165, -7.4469, -15, -32.9994, -40.2252, -7.77675, 42.8295, -44.2004, -10, 118.847, 25.1583, -36.7784, 6.02694, -5.82814, -7.401, 75.1206, -2.02077, 68.7314, 27.3913, 4.83711, 71.8604, 31.2232, -40.9324, -24.9639, -6.9411, -14.9926, 114.591, -0.384052, 97.0027, 3.89138, -7.1827, 104.154, 37.6642, -70.1876, -31.9196, -5.86616, -3.15722, -21.1087, -0.134512, 33.8681, 16.1184, -0.489458, 82.8599, Policy 4 will be: 47.2621, -61.7135, 58.2193, 44.4399, -4.68338, 42.3167, -2.24178, 103.033, -66.5547, -5.81948, 108.598, 7.38193, -75, -21.5426, 36.1945, -3.35655, -37.2198, -10, 21.8091, -57.1483, -2.79566, 101.507, 40.6485, -34.8796, -17.4005, -15, -30.8345, -28.2855, -1.32281, 49.0859, -60.2858, -6.84711, 95.0148, 30.1797, -35.1411, -6.35259, -3.50564, -9.2866, 31.2815, -2.05293, 60.358, -7.80594, 19.3182, 70.876, 29.5792, -44.9354, -19.3436, -5.72316, -15.8258, 110.436, 1.28192, 74.3096, -22.288, -10, 99.7208, 45.2961, -75, -38.3441, -12.0242, -3, 15.1687, -7.5569, 48.0976, 18.2844, -10, 93.3733, trial: 0, score: 7 trial: 1, score: 0 Policy 0: 45.7942, -65.8053, 42.4455, 44.7197, -9.36573, 26.551, -10, 112.191, -81.4956, -8.61961, 122, 10.2489, -75, -5.97458, 34.8359, -3.29235, -46.0258, -10, 31.0226, -35.1231, -4.93213, 109.82, 50.4843, -30.7637, -14.746, -13.4217, -35.4539, -25.7659, -10, 47.7756, -47.36, -3.95417, 106.126, 27.2664, -36.5337, -0.183997, -9.221, -9.76956, 52.5198, -2.832, 68.416, 15.9516, 11.7942, 82.5556, 24.2414, -44.5897, -23.5539, -3.36386, -12.3065, 109.851, 2.75744, 82.9145, -7.98624, -5.43237, 107.243, 41.5052, -68.726, -27.5727, -7.09927, -7.05734, 0.395548, -7.34676, 45.9838, 31.7592, -10, 87.1371, Average Score: 3.5 trial: 0, score: 0 trial: 1, score: 3 Policy 1: 47.6833, -60.9224, 33.1986, 45, -8.11415, 30.155, -10, 114.171, -73.0672, -8.19966, 122, 15.2712, -68.0628, -1.41273, 39.2884, -3, -46.284, -4.55977, 30.1138, -28.9885, -8.39214, 95.3857, 42.5082, -31.4579, -2.05385, -15, -35.2197, -10.5643, -4.41705, 46.6649, -49.9369, -2.28316, 97.0553, 30.9929, -41.9987, 1.28711, -5.47896, -13.9374, 40.0262, -5.34869, 63.0207, -2.22282, 15.9162, 72.3204, 21.1993, -37.4079, -19.508, 1.26451, -11.9131, 116.857, 2.50694, 94.444, -1.51795, -10, 114.131, 45.9152, -71.0956, -42.2227, -3.27657, -10.8321, -3.58056, -6.57094, 31.4022, 23.3472, -4.73843, 77.1617, Average Score: 1.5 trial: 0, score: 4 trial: 1, score: 4 Policy 2: 51.386, -70.055, 29.1653, 45, -9.91107, 49.7695, -2.39206, 118.585, -57.3304, -6.64031, 109.214, 12.7694, -75, -2.00889, 33.1026, -3, -51.4548, -10, 34.9571, -41.6483, -2.2086, 99.3573, 56.1282, -35.0164, -9.64023, -15, -31.8182, -11.3839, -2.82875, 49.6886, -46.5834, -4.8777, 107.367, 31.2154, -43.1864, 15.4575, -14.388, -5.40623, 59.6031, -10, 71.6188, 22.4371, 11.57, 78.7302, 30.5529, -49.9673, -20.0468, -2.30566, -17.4555, 123.863, 7.15001, 79.1963, -31.908, -6.26422, 104.058, 41.9011, -72.0387, -35.849, -1.3478, -10.9546, -14.4569, -6.87999, 46.2045, 18.1878, -10, 81.9883, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 39.8382, -60.7004, 36.3881, 41.5632, -10.4908, 6.91106, -10, 106.875, -97.4876, -9.54038, 116.843, 18.7635, -68.1229, -11.9917, 36.206, -7.63326, -50.2664, -10, 25.5986, -22.4697, -4.19706, 106.651, 41.0721, -30.0165, -7.4469, -15, -32.9994, -40.2252, -7.77675, 42.8295, -44.2004, -10, 118.847, 25.1583, -36.7784, 6.02694, -5.82814, -7.401, 75.1206, -2.02077, 68.7314, 27.3913, 4.83711, 71.8604, 31.2232, -40.9324, -24.9639, -6.9411, -14.9926, 114.591, -0.384052, 97.0027, 3.89138, -7.1827, 104.154, 37.6642, -70.1876, -31.9196, -5.86616, -3.15722, -21.1087, -0.134512, 33.8681, 16.1184, -0.489458, 82.8599, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 47.2621, -61.7135, 58.2193, 44.4399, -4.68338, 42.3167, -2.24178, 103.033, -66.5547, -5.81948, 108.598, 7.38193, -75, -21.5426, 36.1945, -3.35655, -37.2198, -10, 21.8091, -57.1483, -2.79566, 101.507, 40.6485, -34.8796, -17.4005, -15, -30.8345, -28.2855, -1.32281, 49.0859, -60.2858, -6.84711, 95.0148, 30.1797, -35.1411, -6.35259, -3.50564, -9.2866, 31.2815, -2.05293, 60.358, -7.80594, 19.3182, 70.876, 29.5792, -44.9354, -19.3436, -5.72316, -15.8258, 110.436, 1.28192, 74.3096, -22.288, -10, 99.7208, 45.2961, -75, -38.3441, -12.0242, -3, 15.1687, -7.5569, 48.0976, 18.2844, -10, 93.3733, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 51.386, -70.055, 29.1653, 45, -9.91107, 49.7695, -2.39206, 118.585, -57.3304, -6.64031, 109.214, 12.7694, -75, -2.00889, 33.1026, -3, -51.4548, -10, 34.9571, -41.6483, -2.2086, 99.3573, 56.1282, -35.0164, -9.64023, -15, -31.8182, -11.3839, -2.82875, 49.6886, -46.5834, -4.8777, 107.367, 31.2154, -43.1864, 15.4575, -14.388, -5.40623, 59.6031, -10, 71.6188, 22.4371, 11.57, 78.7302, 30.5529, -49.9673, -20.0468, -2.30566, -17.4555, 123.863, 7.15001, 79.1963, -31.908, -6.26422, 104.058, 41.9011, -72.0387, -35.849, -1.3478, -10.9546, -14.4569, -6.87999, 46.2045, 18.1878, -10, 81.9883, Current Best Policy Score: 4 Policy 0 will be: 51.386, -70.055, 29.1653, 45, -9.91107, 49.7695, -2.39206, 118.585, -57.3304, -6.64031, 109.214, 12.7694, -75, -2.00889, 33.1026, -3, -51.4548, -10, 34.9571, -41.6483, -2.2086, 99.3573, 56.1282, -35.0164, -9.64023, -15, -31.8182, -11.3839, -2.82875, 49.6886, -46.5834, -4.8777, 107.367, 31.2154, -43.1864, 15.4575, -14.388, -5.40623, 59.6031, -10, 71.6188, 22.4371, 11.57, 78.7302, 30.5529, -49.9673, -20.0468, -2.30566, -17.4555, 123.863, 7.15001, 79.1963, -31.908, -6.26422, 104.058, 41.9011, -72.0387, -35.849, -1.3478, -10.9546, -14.4569, -6.87999, 46.2045, 18.1878, -10, 81.9883, Policy 1 will be: 46.0855, -73.2175, 32.1508, 42.7479, -5.69313, 34.6949, -1.3295, 112.466, -60.0757, -8.15954, 121.628, 19.1395, -68.8249, 11.1908, 28.3575, -7.72034, -33.1789, -0.925998, 38.0742, -48.9143, -5.35378, 93.5582, 58.8587, -28.289, -4.48196, -10.6451, -37.3603, -33.716, -3.83193, 48.2384, -23.7299, 3.69311, 103.314, 40.7887, -41.9644, 6.25371, -8.88292, -10.9796, 62.1385, -0.248849, 79.6744, 34.6866, 11.8137, 84.1642, 31.9426, -46.8219, -20.9839, -6.59334, -18.2582, 130, 14.8686, 66.9521, -9.49902, -10, 114.972, 45.1955, -68.9393, -50.1542, 3.18221, -14.598, -2.32549, -7.33759, 40.4409, 10.7864, -2.33264, 79.7631, Policy 2 will be: 45.8831, -63.755, 22.1004, 45, -4.93025, 27.2976, -0.13912, 120.098, -56.6579, -5.36043, 109.531, 5.91535, -72.6729, -7.09598, 32.3741, -3.63621, -64.4058, -10, 28.2388, -26.5624, 6.76921, 102.445, 59.0093, -37.749, 6.22845, -14.1689, -33.2105, 5.86715, 2.48235, 43.5224, -66.9538, -2.83874, 109.832, 36.5682, -40.2048, 5.31831, -13.5698, -9.69394, 44.7407, -10, 68.9188, 13.3016, 8.80943, 77.5288, 21.4934, -54.1827, -17.6957, -3.18115, -20.5451, 123.402, 5.17638, 79.6735, -49.2085, 0.22072, 110.419, 47.0904, -69.1725, -38.9414, -3.74502, -6.80862, 7.55487, -10, 34.8342, 25.1179, -10, 92.7761, Policy 3 will be: 55.8692, -73.1876, 26.1018, 45, -12.2999, 57.2669, -10, 122, -66.9896, -0.106431, 113.143, 4.67053, -75, -7.56652, 32.5267, -3, -31.1139, -7.07151, 20.9413, -45.9516, 3.5665, 90.1013, 50.2345, -40.6276, -5.50544, -12.5674, -37.3099, 11.5594, -6.81645, 39.4945, -57.7286, -3.73268, 112.174, 22.5255, -40.7276, 26.3483, -15, -10.495, 62.2022, -10, 79.4023, 46.4208, 8.13301, 75.3139, 30.8601, -50.552, -26.3339, 0.752214, -12.3636, 130, 9.69046, 67.3069, -17.5874, -6.67679, 112.912, 38.1565, -75, -26.7102, 0.451988, -12.5569, 2.14045, -9.261, 43.4502, 11.1737, -5.11079, 71.8797, Policy 4 will be: 59.6343, -72.7394, 43.9381, 45, -7.11998, 28.2549, 5.4088, 122, -70.9828, -1.39082, 107.784, 9.44587, -70.1762, -18.6997, 35.4624, -6.96529, -51.8181, -10, 36.5104, -20.084, -2.4836, 85.2361, 48.0875, -39.551, -11.0484, -15, -30.3048, 0.00146519, -10, 49.3653, -66.6218, -2.57897, 108.643, 29.2862, -49.9067, 11.4146, -14.8006, -3, 42.861, -5.19937, 58.1096, 12.1913, 7.11028, 75.3153, 39.1879, -51.6369, -18.8635, -7.99716, -21.2678, 110.605, 8.41078, 72.9943, -32.6945, -4.40721, 100.095, 40.0194, -65.9954, -41.8366, -1.4788, -7.29336, -34.3367, -1.3346, 47.5987, -4.92079, -8.66001, 77.4924, trial: 0, score: 0