New Algorithm, initial policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, trial: 0, score: 7 trial: 1, score: 6 Policy 0: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 6.5 trial: 0, score: 4 trial: 1, score: 2 Policy 1: 17.111, -45.1542, -17.0786, -3.62655, -54.1435, 97.991, 5.6367, 49.0132, -38.8083, 1.09212, 86.4158, 18.4861, -24.8698, -78.5039, -4.19338, -4.41723, 2.11241, 13.4871, 40.9519, -65.9754, 11.0444, 105.066, 22.8178, 0, -70.4274, -11.9122, -6.6427, 6.5275, 5.87589, -9.64132, -28.6058, -2.20259, 104.909, 11.3212, -37.9777, 67.022, -4.07396, -21.8082, 70.9574, -2.87115, -19.1233, 34.8263, -3.3872, 122, 1.87723, -29.8019, 61.1862, -12.757, -11.0829, 126.483, -10, 7.97378, 64.6542, 2.24605, 112.556, 33.4422, -13.9062, 6.9416, -9.82993, -13.1819, -21.0651, -0.756616, 99.7815, -82.8677, -10, 114.196, Average Score: 3 trial: 0, score: 4 trial: 1, score: 4 Policy 2: 28.478, -53.8083, -20.131, -5.11098, -50.6347, 64.3516, 12.0763, 57.758, -35.065, -0.149297, 74.7832, 24.6539, -19.5012, -88, -4.17865, -5.12832, 44.3603, 8.24558, 24.1581, -57.813, 7.55998, 119.83, 30.1888, -5.68716, -88, -8.52584, -6.57159, -24.9827, 4.06852, -10.9987, -23.9773, 6.75208, 102.733, 9.775, -36.1116, 40.1618, -2.72104, -28.2467, 52.274, -1.03709, 0.352003, 48.5734, 3.09584, 122, 8.89735, -30.4644, 54.149, -11.8119, -19.9832, 128.144, -6.67468, 5.72338, 65.8202, 13.5408, 97.2886, 22.4707, -16.7891, -4.57684, -15, -14.2104, -9.00242, -0.39643, 122, -48.855, -10, 116.682, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 24.3571, -54.9713, -2.91853, -9.78863, -55, 106.362, 17.5581, 64.9054, -31.487, -1.54978, 83.541, 23.8209, -19.271, -88, 2.07053, -7.55675, 31.4809, -2.83304, 24.6124, -64.9022, 5.4629, 122, 23.8117, 0, -88, -2.83796, -11.5875, 1.11465, -2.18949, -2.23308, -34.0758, -4.6583, 119.449, 11.1366, -39.2684, 52.1975, -2.86553, -24.0128, 87.8147, -0.344078, -25, 48.2599, -9.41624, 108.742, 0, -32.9763, 55.9431, -13.6277, -12.1224, 126.251, -3.50219, 9.00049, 46.8718, -3.55995, 100.2, 23.4261, -12.8847, 16.4436, -15, -19.4943, -38.4733, 4.69899, 108.584, -50.1611, -8.57595, 119.572, Average Score: 0 trial: 0, score: 6 trial: 1, score: 6 Policy 4: 17.0548, -53.213, 0.679424, -2.31655, -52.6166, 69.3287, 14.4965, 42.3758, -56.1361, -9.58737, 78.8131, 32.7897, -18.5379, -73.3842, -5.876, -3.29436, 38.8672, -4.92052, 38.5738, -60.7498, 13.1949, 115.156, 16.2416, -2.46551, -73.4944, -11.7248, -8.02275, 4.24754, 11.5225, -14.5919, -16.7696, -4.73698, 119.116, 17.2626, -44.0999, 45.7427, -8.03948, -21.6342, 61.1332, 8.87463, -5.77697, 41.5221, 5.79114, 122, 3.46222, -27.2782, 54.3194, -9.59866, -19.8555, 112.882, 2.59642, 2.30635, 55.068, 10.857, 107.427, 25.1223, -14.6954, 16.3186, -15, -14.3226, -36.8652, 10.3735, 97.941, -91.4615, -7.64053, 104.326, Average Score: 6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 6.5 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 19.1337, -42.5183, 10.4291, -2.62086, -51.2444, 66.8199, 13.9029, 68.3969, -68.8375, -10, 91.1251, 24.3327, -12.7475, -88, -3.0538, -3, 41.2829, 9.65421, 18.3986, -66.1427, 13.6142, 116.586, 24.8588, -7.11699, -88, -3.46005, -7.29062, -10.2534, -1.52403, -25, -25.8865, 8.83276, 116.67, 23.2823, -33.5519, 71.9875, -5.90189, -21.0358, 78.3083, 3.30357, -7.83141, 59.6649, -8.47282, 107.873, 5.54755, -33.834, 69.499, -15, -14.0113, 108.363, -5.52233, 12.6826, 55.9301, 5.16736, 91.5143, 24.2448, -5.89532, -10.6483, -10.4413, -10.6026, -8.24773, -1.90096, 119.522, -85.4829, -10, 121.678, Policy 2 will be: 18.3754, -51.7013, -0.621882, -7.43488, -50.6162, 64.692, 8.6723, 44.983, -33.8041, -10, 84.435, 34.4654, -12.5227, -88, -3.42261, -7.58913, 41.209, 9.83951, 38.3142, -80.7599, 13.131, 109.524, 29.9094, -1.05632, -88, -6.30435, -14.6037, -17.7444, -0.0805764, -23.0505, -46.5544, -5.80158, 117.286, 4.51832, -35.1205, 42.4254, -2.58869, -19.2552, 56.594, -4.06677, -6.51262, 36.1409, -4.79607, 117.609, 4.03805, -30.5769, 69.6672, -15, -15.7163, 108.052, -1.39774, 14.6395, 39.4924, 7.15665, 96.9115, 36.7314, -14.6526, 12.3591, -15, -17.2069, -34.176, 11.7345, 104.707, -49.0398, -2.52386, 120.901, Policy 3 will be: 23.9324, -42.5797, -3.96083, 0.0988218, -52.4286, 80.3362, 1.47844, 46.5244, -26.1757, -6.92486, 65.3528, 25.5308, -21.5337, -75.3359, 3.05327, -3, 7.30332, 3.77196, 46.209, -56.5111, 16.3493, 121.191, 15.3777, 0, -88, -5.85983, -5.28848, 5.66427, -3.55475, -23.8667, -54.9715, 7.16271, 99.0062, 4.61722, -42.7975, 55.9855, -5.81672, -28.3768, 58.3852, -5.59696, -11.1087, 72.3582, 5.77897, 114.279, 8.34141, -31.9473, 66.3028, -11.8277, -20.7554, 94.3034, -1.86906, 4.97966, 63.4561, 9.77739, 86.2226, 24.1357, -17.4592, 19.464, -10.8194, -18.9974, -25.0963, 1.81256, 113.832, -55.2722, -5.55034, 115.645, Policy 4 will be: 21.0236, -40.0229, -5.11159, -2.30532, -53.6055, 102.075, 17.5863, 68.2855, -62.3149, -5.35828, 84.7902, 26.6236, -22.1091, -82.7125, -4.79964, -7.33086, 24.3928, -0.103492, 44.0177, -87.4055, 7.02861, 110.041, 17.3629, 0, -88, -7.32451, -7.95879, -1.88059, 2.56175, -15.4381, -38.9906, 10.4478, 102.615, 13.7652, -36.8129, 60.0371, -7.6898, -24.6901, 62.5856, -5.92591, -16.3193, 69.5195, 9.2821, 122, 11.261, -31.0734, 49.0869, -14.1721, -15.7812, 130, -10, 0.576953, 46.7688, 9.87839, 105.995, 27.2213, -8.52424, -6.52074, -9.94419, -18.25, -20.9386, 7.39973, 106.58, -52.7661, -7.46168, 109.641, trial: 0, score: 8 trial: 1, score: 7 Policy 0: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 7.5 trial: 0, score: 3 trial: 1, score: 3 Policy 1: 19.1337, -42.5183, 10.4291, -2.62086, -51.2444, 66.8199, 13.9029, 68.3969, -68.8375, -10, 91.1251, 24.3327, -12.7475, -88, -3.0538, -3, 41.2829, 9.65421, 18.3986, -66.1427, 13.6142, 116.586, 24.8588, -7.11699, -88, -3.46005, -7.29062, -10.2534, -1.52403, -25, -25.8865, 8.83276, 116.67, 23.2823, -33.5519, 71.9875, -5.90189, -21.0358, 78.3083, 3.30357, -7.83141, 59.6649, -8.47282, 107.873, 5.54755, -33.834, 69.499, -15, -14.0113, 108.363, -5.52233, 12.6826, 55.9301, 5.16736, 91.5143, 24.2448, -5.89532, -10.6483, -10.4413, -10.6026, -8.24773, -1.90096, 119.522, -85.4829, -10, 121.678, Average Score: 3 trial: 0, score: 0 trial: 1, score: 5 Policy 2: 18.3754, -51.7013, -0.621882, -7.43488, -50.6162, 64.692, 8.6723, 44.983, -33.8041, -10, 84.435, 34.4654, -12.5227, -88, -3.42261, -7.58913, 41.209, 9.83951, 38.3142, -80.7599, 13.131, 109.524, 29.9094, -1.05632, -88, -6.30435, -14.6037, -17.7444, -0.0805764, -23.0505, -46.5544, -5.80158, 117.286, 4.51832, -35.1205, 42.4254, -2.58869, -19.2552, 56.594, -4.06677, -6.51262, 36.1409, -4.79607, 117.609, 4.03805, -30.5769, 69.6672, -15, -15.7163, 108.052, -1.39774, 14.6395, 39.4924, 7.15665, 96.9115, 36.7314, -14.6526, 12.3591, -15, -17.2069, -34.176, 11.7345, 104.707, -49.0398, -2.52386, 120.901, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 23.9324, -42.5797, -3.96083, 0.0988218, -52.4286, 80.3362, 1.47844, 46.5244, -26.1757, -6.92486, 65.3528, 25.5308, -21.5337, -75.3359, 3.05327, -3, 7.30332, 3.77196, 46.209, -56.5111, 16.3493, 121.191, 15.3777, 0, -88, -5.85983, -5.28848, 5.66427, -3.55475, -23.8667, -54.9715, 7.16271, 99.0062, 4.61722, -42.7975, 55.9855, -5.81672, -28.3768, 58.3852, -5.59696, -11.1087, 72.3582, 5.77897, 114.279, 8.34141, -31.9473, 66.3028, -11.8277, -20.7554, 94.3034, -1.86906, 4.97966, 63.4561, 9.77739, 86.2226, 24.1357, -17.4592, 19.464, -10.8194, -18.9974, -25.0963, 1.81256, 113.832, -55.2722, -5.55034, 115.645, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 21.0236, -40.0229, -5.11159, -2.30532, -53.6055, 102.075, 17.5863, 68.2855, -62.3149, -5.35828, 84.7902, 26.6236, -22.1091, -82.7125, -4.79964, -7.33086, 24.3928, -0.103492, 44.0177, -87.4055, 7.02861, 110.041, 17.3629, 0, -88, -7.32451, -7.95879, -1.88059, 2.56175, -15.4381, -38.9906, 10.4478, 102.615, 13.7652, -36.8129, 60.0371, -7.6898, -24.6901, 62.5856, -5.92591, -16.3193, 69.5195, 9.2821, 122, 11.261, -31.0734, 49.0869, -14.1721, -15.7812, 130, -10, 0.576953, 46.7688, 9.87839, 105.995, 27.2213, -8.52424, -6.52074, -9.94419, -18.25, -20.9386, 7.39973, 106.58, -52.7661, -7.46168, 109.641, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 7.5 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 18.4879, -54.6282, 12.7412, -7.09562, -54.9027, 62.561, 18.3096, 50.577, -68.404, -7.73851, 90.2861, 28.4977, -21.3043, -82.6401, -2.92644, -3, 25.9709, 5.55018, 22.998, -55.2835, 10.0336, 122, 32.771, 0, -71.7103, -8.07728, -8.15433, 10.1287, -3.29183, -18.0032, -43.7413, 2.79358, 120.09, 13.5858, -41.014, 63.0705, -1.23076, -25.0966, 58.5371, 2.12279, -20.1918, 38.1843, -8.84344, 109.139, 7.244, -23.6847, 52.0389, -9.40202, -11.5753, 102.268, -10, 8.28739, 62.3462, 5.10279, 101.016, 37.3883, -10.792, -6.48842, -12.1799, -10.6565, -38.3199, 12.6342, 111.319, -85.0308, -1.04994, 101.849, Policy 2 will be: 17.9823, -53.2839, 10.7451, -8.07566, -54.6889, 65.3415, 3.5548, 64.0522, -57.3893, -10, 64.1159, 32.3517, -21.3855, -81.1315, -3.53808, -7.9006, 0.524318, 8.64714, 46.1353, -65.1639, 7.25301, 120.633, 20.0815, 0, -88, -8.76293, -4.88729, 8.35656, 13.3533, -23.748, -47.5289, 2.96788, 118.424, 19.9396, -33.3258, 62.9638, 0.0643066, -27.9592, 69.8258, 7.22171, -15.0292, 50.2903, 5.9992, 112.485, 3.53078, -29.3877, 48.421, -11.0254, -10.5338, 117.335, -10, -11.1267, 45.8439, -1.53238, 99.983, 34.8819, -12.8869, -8.13951, -15, -18.9777, -39.4598, 0.269949, 98.0082, -92.1528, -10, 108.715, Policy 3 will be: 32.6543, -42.6084, -3.85428, -9.52409, -55, 87.696, 15.0819, 64.7609, -68.9413, -6.47362, 88.507, 23.0928, -24.6235, -88, -3.1014, -4.77932, 35.5796, 7.59307, 25.6267, -67.0421, 11.0599, 122, 26.951, -2.18982, -82.5299, -3.95277, -7.30685, -22.0176, 1.76251, -10.8266, -40.5992, 11.1214, 99.368, 23.1492, -41.5813, 72.281, -0.0214667, -23.5635, 76.8988, 2.55875, -16.5544, 66.6915, -7.63981, 122, 6.393, -32.7893, 41.7902, -15, -19.8254, 94.7173, -0.462807, -10.9846, 26.978, 4.27769, 84.5859, 35.3051, -11.0942, -5.60015, -12.3174, -14.913, -22.464, 14.0429, 99.1267, -44.55, -7.57766, 112.913, Policy 4 will be: 15.4865, -46.938, -15.7332, -0.832418, -55, 102.259, 7.61813, 63.0266, -37.4573, -8.70133, 74.7331, 18.9984, -19.6629, -88, 2.44796, -3, 4.99838, 6.81929, 38.6571, -75.1279, 14.9473, 115.557, 30.0898, -7.83391, -88, -7.1074, -12.8097, 8.80843, -4.17427, -0.58168, -41.8983, 4.64704, 96.8941, 4.75978, -39.0078, 55.981, 0.483016, -26.2032, 90.7724, 7.94116, -15.2927, 80.1878, 9.27104, 119.39, 0, -28.3424, 51.8966, -9.04993, -9.51481, 94.7754, -4.44978, 3.44665, 39.5502, -2.12624, 100.586, 33.8332, -19.6453, 8.50461, -15, -16.6556, -36.2841, 4.38213, 120.929, -56.4491, -3.35973, 99.4752, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 6 trial: 0, score: 3 trial: 1, score: 5 Policy 1: 18.4879, -54.6282, 12.7412, -7.09562, -54.9027, 62.561, 18.3096, 50.577, -68.404, -7.73851, 90.2861, 28.4977, -21.3043, -82.6401, -2.92644, -3, 25.9709, 5.55018, 22.998, -55.2835, 10.0336, 122, 32.771, 0, -71.7103, -8.07728, -8.15433, 10.1287, -3.29183, -18.0032, -43.7413, 2.79358, 120.09, 13.5858, -41.014, 63.0705, -1.23076, -25.0966, 58.5371, 2.12279, -20.1918, 38.1843, -8.84344, 109.139, 7.244, -23.6847, 52.0389, -9.40202, -11.5753, 102.268, -10, 8.28739, 62.3462, 5.10279, 101.016, 37.3883, -10.792, -6.48842, -12.1799, -10.6565, -38.3199, 12.6342, 111.319, -85.0308, -1.04994, 101.849, Average Score: 4 trial: 0, score: 2 trial: 1, score: 6 Policy 2: 17.9823, -53.2839, 10.7451, -8.07566, -54.6889, 65.3415, 3.5548, 64.0522, -57.3893, -10, 64.1159, 32.3517, -21.3855, -81.1315, -3.53808, -7.9006, 0.524318, 8.64714, 46.1353, -65.1639, 7.25301, 120.633, 20.0815, 0, -88, -8.76293, -4.88729, 8.35656, 13.3533, -23.748, -47.5289, 2.96788, 118.424, 19.9396, -33.3258, 62.9638, 0.0643066, -27.9592, 69.8258, 7.22171, -15.0292, 50.2903, 5.9992, 112.485, 3.53078, -29.3877, 48.421, -11.0254, -10.5338, 117.335, -10, -11.1267, 45.8439, -1.53238, 99.983, 34.8819, -12.8869, -8.13951, -15, -18.9777, -39.4598, 0.269949, 98.0082, -92.1528, -10, 108.715, Average Score: 4 trial: 0, score: 6 trial: 1, score: 6 Policy 3: 32.6543, -42.6084, -3.85428, -9.52409, -55, 87.696, 15.0819, 64.7609, -68.9413, -6.47362, 88.507, 23.0928, -24.6235, -88, -3.1014, -4.77932, 35.5796, 7.59307, 25.6267, -67.0421, 11.0599, 122, 26.951, -2.18982, -82.5299, -3.95277, -7.30685, -22.0176, 1.76251, -10.8266, -40.5992, 11.1214, 99.368, 23.1492, -41.5813, 72.281, -0.0214667, -23.5635, 76.8988, 2.55875, -16.5544, 66.6915, -7.63981, 122, 6.393, -32.7893, 41.7902, -15, -19.8254, 94.7173, -0.462807, -10.9846, 26.978, 4.27769, 84.5859, 35.3051, -11.0942, -5.60015, -12.3174, -14.913, -22.464, 14.0429, 99.1267, -44.55, -7.57766, 112.913, Average Score: 6 trial: 0, score: 1 trial: 1, score: 0 Policy 4: 15.4865, -46.938, -15.7332, -0.832418, -55, 102.259, 7.61813, 63.0266, -37.4573, -8.70133, 74.7331, 18.9984, -19.6629, -88, 2.44796, -3, 4.99838, 6.81929, 38.6571, -75.1279, 14.9473, 115.557, 30.0898, -7.83391, -88, -7.1074, -12.8097, 8.80843, -4.17427, -0.58168, -41.8983, 4.64704, 96.8941, 4.75978, -39.0078, 55.981, 0.483016, -26.2032, 90.7724, 7.94116, -15.2927, 80.1878, 9.27104, 119.39, 0, -28.3424, 51.8966, -9.04993, -9.51481, 94.7754, -4.44978, 3.44665, 39.5502, -2.12624, 100.586, 33.8332, -19.6453, 8.50461, -15, -16.6556, -36.2841, 4.38213, 120.929, -56.4491, -3.35973, 99.4752, Average Score: 0.5 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 6 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 28.97, -48.5078, -0.624845, -0.131828, -54.0611, 106.982, 6.58211, 45.0007, -37.6218, -9.80964, 83.3934, 24.9412, -20.6542, -88, 1.54491, -7.28702, 6.34695, -3.23689, 25.9426, -64.8294, 8.49816, 105.225, 29.2591, 0, -71.9446, -2.32804, -7.29042, 19.2623, 7.51048, -10.2247, -29.1337, -1.96222, 96.6584, 11.7375, -34.2662, 54.9348, -5.17083, -19.511, 55.5269, 6.51565, -21.0264, 70.5771, 3.2975, 122, 0, -26.7015, 59.6709, -15, -11.1616, 117.308, 0.332802, 13.7737, 41.0397, 0.863421, 88.4109, 39.5329, -12.455, 22.7981, -15, -12.8255, -1.61256, 8.04049, 119.374, -92.7635, -8.51832, 105.492, Policy 2 will be: 16.1267, -51.8234, 7.20617, -4.0199, -55, 79.029, 16.1724, 60.2288, -58.713, -10, 81.5303, 25.2434, -15.574, -88, -2.28083, -5.3263, 2.94508, -1.18109, 26.9191, -87.96, 4.43474, 122, 15.9001, 0, -85.4141, -13.2045, -5.95521, 16.5456, 2.65319, -20.5212, -18.6627, 1.6358, 99.0442, 12.7504, -31.0922, 59.6396, -2.71926, -23.5947, 67.288, -3.98028, -2.4068, 46.3838, 1.55406, 122, 0, -22.4475, 63.3327, -15, -15.8804, 127.569, -9.00723, -5.57632, 43.9287, 12.313, 90.3174, 37.1093, -17.3617, 10.725, -15, -16.1081, -33.4243, -3.23115, 99.9933, -72.3591, -1.37306, 108.62, Policy 3 will be: 29.7606, -40.409, 10.8369, -5.38538, -55, 79.1889, 2.21385, 45.5197, -42.3221, -8.77487, 91.1671, 31.8246, -20.7104, -76.5867, 4.39987, -4.64975, 44.9002, 0.613483, 42.1996, -53.2016, 13.4114, 121.406, 15.5232, -7.22317, -87.7922, -12.7641, -12.3655, 13.9854, 2.90088, -14.7994, -27.0084, 2.27787, 103.147, 15.0213, -32.0444, 56.9437, -1.26129, -19.3061, 71.9053, 8.08023, -13.6599, 43.0311, -6.62635, 122, 0, -26.8767, 54.7584, -15, -16.9604, 92.1468, -8.29874, 6.84672, 43.394, -4.11044, 87.6856, 36.8514, -11.4254, 18.1607, -9.97231, -12.6373, -37.25, -0.0526414, 122, -60.6408, -10, 99.3305, Policy 4 will be: 23.4022, -53.1298, -16.3891, -10.2093, -55, 72.7526, 16.0686, 54.2319, -33.6489, -0.0471843, 74.5832, 18.8332, -14.0894, -86.8196, -2.74738, -8.26378, 24.693, 2.51875, 18.3616, -41.4117, 5.15225, 114.702, 32.5495, -6.97662, -71.613, -9.65761, -10.0527, -24.5429, -3.42421, -4.72527, -19.7563, 5.51542, 119.76, 15.3975, -32.8609, 60.4409, 1.79507, -28.9447, 57.06, -4.60358, -5.12325, 62.1105, -3.11108, 122, 8.50585, -31.0721, 61.3091, -11.3521, -19.9456, 114.404, 0.482697, 4.21992, 29.7003, -2.69914, 94.8602, 32.6065, -17.5798, 12.6442, -11.0263, -21.409, -8.16769, 14.2575, 119.977, -78.2285, -10, 101.907, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 6 trial: 0, score: 2 trial: 1, score: 3 Policy 1: 28.97, -48.5078, -0.624845, -0.131828, -54.0611, 106.982, 6.58211, 45.0007, -37.6218, -9.80964, 83.3934, 24.9412, -20.6542, -88, 1.54491, -7.28702, 6.34695, -3.23689, 25.9426, -64.8294, 8.49816, 105.225, 29.2591, 0, -71.9446, -2.32804, -7.29042, 19.2623, 7.51048, -10.2247, -29.1337, -1.96222, 96.6584, 11.7375, -34.2662, 54.9348, -5.17083, -19.511, 55.5269, 6.51565, -21.0264, 70.5771, 3.2975, 122, 0, -26.7015, 59.6709, -15, -11.1616, 117.308, 0.332802, 13.7737, 41.0397, 0.863421, 88.4109, 39.5329, -12.455, 22.7981, -15, -12.8255, -1.61256, 8.04049, 119.374, -92.7635, -8.51832, 105.492, Average Score: 2.5 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 16.1267, -51.8234, 7.20617, -4.0199, -55, 79.029, 16.1724, 60.2288, -58.713, -10, 81.5303, 25.2434, -15.574, -88, -2.28083, -5.3263, 2.94508, -1.18109, 26.9191, -87.96, 4.43474, 122, 15.9001, 0, -85.4141, -13.2045, -5.95521, 16.5456, 2.65319, -20.5212, -18.6627, 1.6358, 99.0442, 12.7504, -31.0922, 59.6396, -2.71926, -23.5947, 67.288, -3.98028, -2.4068, 46.3838, 1.55406, 122, 0, -22.4475, 63.3327, -15, -15.8804, 127.569, -9.00723, -5.57632, 43.9287, 12.313, 90.3174, 37.1093, -17.3617, 10.725, -15, -16.1081, -33.4243, -3.23115, 99.9933, -72.3591, -1.37306, 108.62, Average Score: 0 trial: 0, score: 1 trial: 1, score: 2 Policy 3: 29.7606, -40.409, 10.8369, -5.38538, -55, 79.1889, 2.21385, 45.5197, -42.3221, -8.77487, 91.1671, 31.8246, -20.7104, -76.5867, 4.39987, -4.64975, 44.9002, 0.613483, 42.1996, -53.2016, 13.4114, 121.406, 15.5232, -7.22317, -87.7922, -12.7641, -12.3655, 13.9854, 2.90088, -14.7994, -27.0084, 2.27787, 103.147, 15.0213, -32.0444, 56.9437, -1.26129, -19.3061, 71.9053, 8.08023, -13.6599, 43.0311, -6.62635, 122, 0, -26.8767, 54.7584, -15, -16.9604, 92.1468, -8.29874, 6.84672, 43.394, -4.11044, 87.6856, 36.8514, -11.4254, 18.1607, -9.97231, -12.6373, -37.25, -0.0526414, 122, -60.6408, -10, 99.3305, Average Score: 1.5 trial: 0, score: 4 trial: 1, score: 3 Policy 4: 23.4022, -53.1298, -16.3891, -10.2093, -55, 72.7526, 16.0686, 54.2319, -33.6489, -0.0471843, 74.5832, 18.8332, -14.0894, -86.8196, -2.74738, -8.26378, 24.693, 2.51875, 18.3616, -41.4117, 5.15225, 114.702, 32.5495, -6.97662, -71.613, -9.65761, -10.0527, -24.5429, -3.42421, -4.72527, -19.7563, 5.51542, 119.76, 15.3975, -32.8609, 60.4409, 1.79507, -28.9447, 57.06, -4.60358, -5.12325, 62.1105, -3.11108, 122, 8.50585, -31.0721, 61.3091, -11.3521, -19.9456, 114.404, 0.482697, 4.21992, 29.7003, -2.69914, 94.8602, 32.6065, -17.5798, 12.6442, -11.0263, -21.409, -8.16769, 14.2575, 119.977, -78.2285, -10, 101.907, Average Score: 3.5 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 6 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 14.3894, -52.5202, 11.1349, -5.6656, -51.3787, 67.4224, 10.4227, 56.522, -58.3583, -9.6025, 79.5326, 25.286, -14.1708, -73.1306, 2.58176, -6.52916, 40.1031, 6.60949, 33.1709, -59.0604, 16.8182, 112.625, 15.7496, 0, -88, -13.7416, -6.3059, 7.04425, 9.01278, -3.15704, -17.9295, 7.70326, 109.145, 7.28288, -34.4175, 49.7941, -9.43773, -21.4824, 73.8878, -5.11802, 2.57002, 70.1661, 4.53367, 122, 0, -29.975, 36.6919, -15, -16.4275, 103.607, -8.25634, -7.28855, 46.512, 8.15706, 88.8035, 22.4891, -19.0844, -0.757691, -12.1997, -15.9721, -9.03408, 2.52265, 107.877, -91.1785, -10, 101.051, Policy 2 will be: 33.0887, -46.5435, -16.0401, -6.46562, -55, 89.3676, 15.0818, 41.543, -44.52, -9.90952, 78.072, 36.0378, -19.1565, -70.6444, 4.50964, -3, 26.1048, 2.37337, 37.8905, -53.3368, 1.822, 107.327, 22.2224, -5.6624, -88, -11.8891, -10.0137, 11.589, 12.5177, -22.4236, -21.8834, -3.64571, 103.08, 14.8206, -32.3905, 49.2996, -9.27081, -24.8279, 88.5874, 11.8498, -2.50743, 76.2521, -2.77108, 122, 0.789477, -32.2783, 47.1235, -11.9839, -19.0832, 106.609, 4.4037, 10.5014, 71.9469, -2.53581, 98.1907, 38.9291, -15.4093, 2.67482, -14.4448, -11.5099, -8.0241, 0.454626, 104.786, -78.953, -3.81078, 94.4449, Policy 3 will be: 21.8124, -40.8983, -2.70282, -9.86558, -52.0885, 96.1082, 15.0706, 65.0129, -45.7794, -8.82702, 89.2677, 27.959, -24.041, -82.9867, -2.23684, -5.05029, 44.7313, 13.237, 36.3705, -64.6599, 8.31019, 109.942, 17.1404, 0, -84.5349, -7.54242, -6.21644, 10.7755, 13.7445, 0.804342, -40.4476, 3.83324, 110.223, 20.5484, -38.6064, 66.8491, -0.463876, -28.7181, 47.9548, 12.445, -0.134138, 80.9956, -0.705677, 122, 0, -27.3538, 61.8574, -13.002, -20.2053, 89.9341, -3.96201, 0.073931, 68.1476, 13.001, 111.866, 35.4661, -18.8945, 12.8822, -15, -18.5638, -37.3209, 10.9267, 112.721, -92.1218, -3.55892, 97.7066, Policy 4 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, trial: 0, score: 7 trial: 1, score: 6 Policy 0: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 6.5 trial: 0, score: 0 trial: 1, score: 2 Policy 1: 14.3894, -52.5202, 11.1349, -5.6656, -51.3787, 67.4224, 10.4227, 56.522, -58.3583, -9.6025, 79.5326, 25.286, -14.1708, -73.1306, 2.58176, -6.52916, 40.1031, 6.60949, 33.1709, -59.0604, 16.8182, 112.625, 15.7496, 0, -88, -13.7416, -6.3059, 7.04425, 9.01278, -3.15704, -17.9295, 7.70326, 109.145, 7.28288, -34.4175, 49.7941, -9.43773, -21.4824, 73.8878, -5.11802, 2.57002, 70.1661, 4.53367, 122, 0, -29.975, 36.6919, -15, -16.4275, 103.607, -8.25634, -7.28855, 46.512, 8.15706, 88.8035, 22.4891, -19.0844, -0.757691, -12.1997, -15.9721, -9.03408, 2.52265, 107.877, -91.1785, -10, 101.051, Average Score: 1 trial: 0, score: 6 trial: 1, score: 7 Policy 2: 33.0887, -46.5435, -16.0401, -6.46562, -55, 89.3676, 15.0818, 41.543, -44.52, -9.90952, 78.072, 36.0378, -19.1565, -70.6444, 4.50964, -3, 26.1048, 2.37337, 37.8905, -53.3368, 1.822, 107.327, 22.2224, -5.6624, -88, -11.8891, -10.0137, 11.589, 12.5177, -22.4236, -21.8834, -3.64571, 103.08, 14.8206, -32.3905, 49.2996, -9.27081, -24.8279, 88.5874, 11.8498, -2.50743, 76.2521, -2.77108, 122, 0.789477, -32.2783, 47.1235, -11.9839, -19.0832, 106.609, 4.4037, 10.5014, 71.9469, -2.53581, 98.1907, 38.9291, -15.4093, 2.67482, -14.4448, -11.5099, -8.0241, 0.454626, 104.786, -78.953, -3.81078, 94.4449, Average Score: 6.5 trial: 0, score: 6 trial: 1, score: 6 Policy 3: 21.8124, -40.8983, -2.70282, -9.86558, -52.0885, 96.1082, 15.0706, 65.0129, -45.7794, -8.82702, 89.2677, 27.959, -24.041, -82.9867, -2.23684, -5.05029, 44.7313, 13.237, 36.3705, -64.6599, 8.31019, 109.942, 17.1404, 0, -84.5349, -7.54242, -6.21644, 10.7755, 13.7445, 0.804342, -40.4476, 3.83324, 110.223, 20.5484, -38.6064, 66.8491, -0.463876, -28.7181, 47.9548, 12.445, -0.134138, 80.9956, -0.705677, 122, 0, -27.3538, 61.8574, -13.002, -20.2053, 89.9341, -3.96201, 0.073931, 68.1476, 13.001, 111.866, 35.4661, -18.8945, 12.8822, -15, -18.5638, -37.3209, 10.9267, 112.721, -92.1218, -3.55892, 97.7066, Average Score: 6 trial: 0, score: 7 trial: 1, score: 7 Policy 4: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 7 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 7 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 26.0725, -39.8457, -2.31497, 0.0583408, -48.257, 72.7599, 11.3255, 62.0176, -5.77842, -10, 70.8307, 33.4334, -21.4836, -73.4538, 1.09068, -6.80354, -0.612631, 14.1402, 46.5445, -69.4682, 10.4624, 122, 21.9345, -7.84237, -88, -15, -11.2931, -20.5444, 17.8281, -5.21359, -42.3068, 5.94551, 103.406, 14.2183, -43.5152, 67.9373, -2.91017, -21.4339, 39.0791, 6.77425, 1.10374, 66.8363, 12.7386, 122, 1.98074, -30.774, 47.8549, -10.8426, -13.6326, 91.1468, -5.75358, 8.96796, 33.0324, 7.77709, 97.0073, 33.7331, -8.26496, 32.8093, -15, -17.813, -4.72337, 7.63335, 96.6744, -63.0868, 3.52259, 122, Policy 2 will be: 34.0458, -41.9302, -7.56358, -1.64317, -55, 102.668, 5.46818, 67.2357, -10.4611, -5.21354, 87.8472, 28.6683, -9.00375, -57.8898, 7.16564, -8.21699, 4.56603, 10.5301, 60.639, -68.0236, 10.1127, 122, 23.4858, 0, -88, -13.7406, -7.98021, -32.0533, 1.32179, -10.7202, -16.1334, 4.89857, 92.4001, 10.5501, -48.7372, 54.6112, -5.98933, -12.9508, 80.0401, 2.03251, -23.6877, 59.3273, 8.82897, 122, 14.4087, -32.6605, 61.1062, -15, -13.4601, 88.1924, 0.974102, 7.72485, 59.0189, 20.6142, 83.2903, 24.6219, -9.96676, 37.5396, -12.4381, -19.4476, -11.0303, 6.31063, 101.043, -32.404, -9.41104, 122, Policy 3 will be: 33.9568, -46.6914, -8.30767, 2.30398, -51.9161, 94.8752, 5.14525, 67.0492, -27.4536, -10, 86.7256, 26.6854, -21.849, -56.5196, 0.807813, -4.35789, -13.1189, 2.37614, 38.2026, -93.8627, 6.45464, 122, 30.7005, -2.4671, -80.7078, -7.23583, -10.0773, -31.8928, 15.7775, -6.71371, -44.9788, 11.1964, 100.971, 21.0998, -41.298, 60.8566, -4.49397, -16.4789, 79.4199, 1.62726, -12.4371, 48.7079, 15.6909, 122, 10.7377, -33.8972, 44.4446, -15, -14.7396, 92.4342, 0.728884, 1.51897, 73.375, 19.6036, 78.4039, 27.6262, -1.70763, 9.01348, -15, -15.4708, 2.12536, 0.62344, 122, -43.1359, 5.08286, 116.469, Policy 4 will be: 39.6343, -47.4519, -18.8377, -6.92389, -47.6463, 77.9568, 1.68654, 65.1169, -31.6329, -6.14895, 68.9074, 15.0468, -19.6259, -64.6063, -3.21215, -3, 19.33, 14.3483, 59.3306, -97.8949, 14.1116, 107.748, 32.5099, -9.21484, -87.3861, -8.34871, -14.7323, -15.9248, 9.22637, -16.6625, -30.2164, 17.904, 81.7276, 13.149, -40.7984, 58.938, -5.73896, -22.3073, 81.5032, 4.69963, -12.8975, 79.938, 11.7179, 116.003, 16.1257, -35.9833, 48.7463, -15, -21.8339, 110.929, 1.00801, 3.81018, 29.6868, 6.44869, 94.0931, 24.2456, -15.2688, 20.396, -15, -15.6498, -8.39799, -8.78324, 97.0516, -36.8637, 2.18314, 111.017, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 6 trial: 0, score: 3 trial: 1, score: 4 Policy 1: 26.0725, -39.8457, -2.31497, 0.0583408, -48.257, 72.7599, 11.3255, 62.0176, -5.77842, -10, 70.8307, 33.4334, -21.4836, -73.4538, 1.09068, -6.80354, -0.612631, 14.1402, 46.5445, -69.4682, 10.4624, 122, 21.9345, -7.84237, -88, -15, -11.2931, -20.5444, 17.8281, -5.21359, -42.3068, 5.94551, 103.406, 14.2183, -43.5152, 67.9373, -2.91017, -21.4339, 39.0791, 6.77425, 1.10374, 66.8363, 12.7386, 122, 1.98074, -30.774, 47.8549, -10.8426, -13.6326, 91.1468, -5.75358, 8.96796, 33.0324, 7.77709, 97.0073, 33.7331, -8.26496, 32.8093, -15, -17.813, -4.72337, 7.63335, 96.6744, -63.0868, 3.52259, 122, Average Score: 3.5 trial: 0, score: 1 trial: 1, score: 0 Policy 2: 34.0458, -41.9302, -7.56358, -1.64317, -55, 102.668, 5.46818, 67.2357, -10.4611, -5.21354, 87.8472, 28.6683, -9.00375, -57.8898, 7.16564, -8.21699, 4.56603, 10.5301, 60.639, -68.0236, 10.1127, 122, 23.4858, 0, -88, -13.7406, -7.98021, -32.0533, 1.32179, -10.7202, -16.1334, 4.89857, 92.4001, 10.5501, -48.7372, 54.6112, -5.98933, -12.9508, 80.0401, 2.03251, -23.6877, 59.3273, 8.82897, 122, 14.4087, -32.6605, 61.1062, -15, -13.4601, 88.1924, 0.974102, 7.72485, 59.0189, 20.6142, 83.2903, 24.6219, -9.96676, 37.5396, -12.4381, -19.4476, -11.0303, 6.31063, 101.043, -32.404, -9.41104, 122, Average Score: 0.5 trial: 0, score: 4 trial: 1, score: 5 Policy 3: 33.9568, -46.6914, -8.30767, 2.30398, -51.9161, 94.8752, 5.14525, 67.0492, -27.4536, -10, 86.7256, 26.6854, -21.849, -56.5196, 0.807813, -4.35789, -13.1189, 2.37614, 38.2026, -93.8627, 6.45464, 122, 30.7005, -2.4671, -80.7078, -7.23583, -10.0773, -31.8928, 15.7775, -6.71371, -44.9788, 11.1964, 100.971, 21.0998, -41.298, 60.8566, -4.49397, -16.4789, 79.4199, 1.62726, -12.4371, 48.7079, 15.6909, 122, 10.7377, -33.8972, 44.4446, -15, -14.7396, 92.4342, 0.728884, 1.51897, 73.375, 19.6036, 78.4039, 27.6262, -1.70763, 9.01348, -15, -15.4708, 2.12536, 0.62344, 122, -43.1359, 5.08286, 116.469, Average Score: 4.5 trial: 0, score: 6 trial: 1, score: 5 Policy 4: 39.6343, -47.4519, -18.8377, -6.92389, -47.6463, 77.9568, 1.68654, 65.1169, -31.6329, -6.14895, 68.9074, 15.0468, -19.6259, -64.6063, -3.21215, -3, 19.33, 14.3483, 59.3306, -97.8949, 14.1116, 107.748, 32.5099, -9.21484, -87.3861, -8.34871, -14.7323, -15.9248, 9.22637, -16.6625, -30.2164, 17.904, 81.7276, 13.149, -40.7984, 58.938, -5.73896, -22.3073, 81.5032, 4.69963, -12.8975, 79.938, 11.7179, 116.003, 16.1257, -35.9833, 48.7463, -15, -21.8339, 110.929, 1.00801, 3.81018, 29.6868, 6.44869, 94.0931, 24.2456, -15.2688, 20.396, -15, -15.6498, -8.39799, -8.78324, 97.0516, -36.8637, 2.18314, 111.017, Average Score: 5.5 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 6 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 28.4994, -43.855, -5.24968, 2.14211, -53.2114, 87.2625, 15.658, 77.8757, -37.1533, -1.02088, 86.4923, 20.0224, -20.1988, -54.7145, -3.13847, -7.68827, 27.3897, 12.9292, 42.6845, -64.6353, 10.8929, 113.59, 19.3913, -8.94866, -81.1943, -14.7029, -4.62307, -41.0053, 18.89, -1.13797, -33.4221, 7.52736, 91.6044, 20.0282, -38.6995, 69.5503, -6.6705, -16.5796, 50.7834, 9.5169, -19.9483, 66.5025, 0.452398, 122, 9.62628, -30.4909, 52.0234, -12.1716, -13.7836, 113.074, 1.70738, -12.3951, 45.9137, 16.1012, 97.8065, 38.2835, -7.40422, 21.9051, -15, -18.5791, 14.8038, -2.08156, 104.359, -29.2016, -10, 122, Policy 2 will be: 27.1095, -43.5483, -19.8121, -7.24813, -55, 106.221, 8.57989, 57.354, -9.56788, -7.16121, 87.5314, 22.2983, -21.0667, -65.4055, -0.513029, -4.17145, 14.846, 9.15334, 49.8413, -64.6125, 5.48071, 122, 25.4201, -2.89656, -83.9501, -12.9828, -13.4745, -34.6933, 18.6637, 12.255, -9.94091, 7.754, 101.778, 21.3721, -50.87, 67.8681, 0.466859, -18.5726, 57.7453, 7.61779, -23.9584, 75.6215, 7.84437, 122, 0, -28.7649, 38.4428, -15, -13.3283, 130, -4.56926, 7.67173, 43.3776, 2.58033, 80.602, 34.4109, -14.5527, 38.5903, -12.8109, -18.6658, 23.3748, 2.05496, 96.5049, -66.191, -4.52591, 122, Policy 3 will be: 26.1856, -45.16, -18.8025, -1.0685, -55, 85.1483, 3.90029, 54.9754, -29.9592, -10, 67.9612, 14.4546, -14.9127, -55.7099, 2.64739, -3, 26.2497, 11.438, 48.4373, -55.962, 10.7982, 110.585, 24.4887, -8.90912, -88, -15, -8.30428, -45.952, 7.82318, 7.78808, -9.57285, 13.03, 85.3141, 16.3263, -37.724, 42.71, -2.39728, -19.7697, 73.5638, 1.90367, -23.8008, 81.7525, 18.4964, 108.524, 10.6067, -40.5622, 59.8453, -15, -12.6392, 89.9307, -1.68026, -8.41968, 28.3277, 6.83508, 96.2681, 24.0823, -1.44672, 18.0609, -15, -19.1491, 26.9283, -9.00547, 122, -70.4796, -1.41721, 122, Policy 4 will be: 38.6296, -39.7794, -15.6403, -8.81293, -55, 95.2554, 6.58405, 51.7684, -30.6268, -8.23769, 80.3325, 18.111, -7.90779, -53.5078, 0.748919, -3, 13.1804, 9.14985, 57.0635, -84.4099, 16.696, 118.755, 23.168, 0, -77.0525, -12.8959, -14.8679, -41.5674, 1.66747, -9.55146, -44.6462, 15.9433, 92.9586, 27.2951, -48.1925, 63.469, -8.37898, -15.244, 58.9144, 3.86391, -22.9577, 67.0537, 13.0287, 110.63, 0, -39.0279, 35.4184, -15, -21.875, 112.426, -5.93161, 0.94216, 66.7634, 15.002, 82.333, 27.7596, -5.46418, 20.8252, -11.35, -22.6678, 0.0839482, 9.16665, 106.698, -57.6226, -10, 116.939, trial: 0, score: 7 trial: 1, score: 7 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 7 trial: 0, score: 4 trial: 1, score: 5 Policy 1: 28.4994, -43.855, -5.24968, 2.14211, -53.2114, 87.2625, 15.658, 77.8757, -37.1533, -1.02088, 86.4923, 20.0224, -20.1988, -54.7145, -3.13847, -7.68827, 27.3897, 12.9292, 42.6845, -64.6353, 10.8929, 113.59, 19.3913, -8.94866, -81.1943, -14.7029, -4.62307, -41.0053, 18.89, -1.13797, -33.4221, 7.52736, 91.6044, 20.0282, -38.6995, 69.5503, -6.6705, -16.5796, 50.7834, 9.5169, -19.9483, 66.5025, 0.452398, 122, 9.62628, -30.4909, 52.0234, -12.1716, -13.7836, 113.074, 1.70738, -12.3951, 45.9137, 16.1012, 97.8065, 38.2835, -7.40422, 21.9051, -15, -18.5791, 14.8038, -2.08156, 104.359, -29.2016, -10, 122, Average Score: 4.5 trial: 0, score: 5 trial: 1, score: 4 Policy 2: 27.1095, -43.5483, -19.8121, -7.24813, -55, 106.221, 8.57989, 57.354, -9.56788, -7.16121, 87.5314, 22.2983, -21.0667, -65.4055, -0.513029, -4.17145, 14.846, 9.15334, 49.8413, -64.6125, 5.48071, 122, 25.4201, -2.89656, -83.9501, -12.9828, -13.4745, -34.6933, 18.6637, 12.255, -9.94091, 7.754, 101.778, 21.3721, -50.87, 67.8681, 0.466859, -18.5726, 57.7453, 7.61779, -23.9584, 75.6215, 7.84437, 122, 0, -28.7649, 38.4428, -15, -13.3283, 130, -4.56926, 7.67173, 43.3776, 2.58033, 80.602, 34.4109, -14.5527, 38.5903, -12.8109, -18.6658, 23.3748, 2.05496, 96.5049, -66.191, -4.52591, 122, Average Score: 4.5 trial: 0, score: 5 trial: 1, score: 6 Policy 3: 26.1856, -45.16, -18.8025, -1.0685, -55, 85.1483, 3.90029, 54.9754, -29.9592, -10, 67.9612, 14.4546, -14.9127, -55.7099, 2.64739, -3, 26.2497, 11.438, 48.4373, -55.962, 10.7982, 110.585, 24.4887, -8.90912, -88, -15, -8.30428, -45.952, 7.82318, 7.78808, -9.57285, 13.03, 85.3141, 16.3263, -37.724, 42.71, -2.39728, -19.7697, 73.5638, 1.90367, -23.8008, 81.7525, 18.4964, 108.524, 10.6067, -40.5622, 59.8453, -15, -12.6392, 89.9307, -1.68026, -8.41968, 28.3277, 6.83508, 96.2681, 24.0823, -1.44672, 18.0609, -15, -19.1491, 26.9283, -9.00547, 122, -70.4796, -1.41721, 122, Average Score: 5.5 trial: 0, score: 3 trial: 1, score: 3 Policy 4: 38.6296, -39.7794, -15.6403, -8.81293, -55, 95.2554, 6.58405, 51.7684, -30.6268, -8.23769, 80.3325, 18.111, -7.90779, -53.5078, 0.748919, -3, 13.1804, 9.14985, 57.0635, -84.4099, 16.696, 118.755, 23.168, 0, -77.0525, -12.8959, -14.8679, -41.5674, 1.66747, -9.55146, -44.6462, 15.9433, 92.9586, 27.2951, -48.1925, 63.469, -8.37898, -15.244, 58.9144, 3.86391, -22.9577, 67.0537, 13.0287, 110.63, 0, -39.0279, 35.4184, -15, -21.875, 112.426, -5.93161, 0.94216, 66.7634, 15.002, 82.333, 27.7596, -5.46418, 20.8252, -11.35, -22.6678, 0.0839482, 9.16665, 106.698, -57.6226, -10, 116.939, Average Score: 3 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 7 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 39.4119, -44.1555, -13.6454, -6.32841, -53.0421, 90.6143, 14.7637, 58.5744, -45.0253, -10, 63.3768, 26.5241, -15.4129, -78.7366, -1.4247, -3, 27.2107, 0.77798, 45.3889, -90.6551, 6.69175, 118.529, 35.86, -7.62456, -88, -10.3207, -11.3082, -35.6996, 0.99542, -10.7083, -40.4013, 16.7177, 89.943, 21.2595, -36.4281, 57.1199, -0.935867, -21.6395, 42.1762, 4.10257, -10.5332, 45.822, 14.0252, 108.974, 8.28419, -28.3788, 34.3123, -13.94, -14.4334, 110.805, -3.08792, -4.14082, 47.1027, 13.2152, 104.505, 26.703, -3.64451, 36.8989, -11.1876, -15.7999, 3.60333, 1.71835, 97.9673, -55.3563, -1.98977, 119.648, Policy 2 will be: 37.8834, -36.5717, 2.53394, -7.00557, -49.8563, 63.6425, 7.4565, 67.1016, -23.6834, -0.581142, 65.3348, 19.1879, -14.3491, -67.4256, 1.22403, -5.28732, -16.4556, 8.98351, 58.3517, -75.2106, 10.4262, 112.774, 24.2221, 0, -70.0021, -12.5342, -10.3108, -2.16946, 15.0892, 12.2488, -16.0072, 8.88103, 94.5644, 25.8376, -42.1154, 49.4257, 0.312657, -16.7258, 46.2525, 6.89514, -11.6828, 68.5206, 0.213099, 122, 5.39786, -29.2721, 27.9808, -15, -15.3152, 89.5026, 0.12062, 6.82133, 45.3533, 12.0142, 89.9696, 41.6255, -1.0275, 24.5223, -12.694, -16.505, -0.19947, -2.16975, 122, -36.8561, 3.65837, 115.554, Policy 3 will be: 28.8808, -42.4281, -19.8195, -2.82935, -53.2809, 59.7826, 2.02097, 74.1484, -5.5862, -7.24765, 85.261, 17.101, -13.1968, -72.935, -2.32068, -3, -15.718, 1.06089, 37.8579, -56.1929, 17.3566, 122, 30.1005, -9.73181, -88, -9.03276, -8.01371, -42.1173, 8.21577, -16.0356, -22.3129, 6.09817, 102.117, 18.1098, -39.611, 40.9567, 0.0695635, -15.6056, 48.8852, 1.37496, -3.1538, 83.6781, 3.28328, 122, 14.4964, -31.8436, 41.6552, -12.6971, -15.5075, 130, -8.97919, -12.8056, 36.1413, 4.92195, 95.9135, 27.9232, -4.10597, 9.76427, -9.67695, -13.8202, -2.41018, -7.56492, 107.829, -56.5819, -5.27083, 107.356, Policy 4 will be: 35.1646, -46.3211, -27.3888, -6.46577, -47.787, 71.7058, 9.83745, 57.9734, -34.9647, -2.27865, 72.1559, 13.8091, -17.8809, -83.0957, -1.98362, -3, -15.6161, 1.51179, 35.936, -81.6804, 15.5014, 111.2, 19.7878, -9.36516, -77.6079, -12.3448, -14.9708, -46.1699, 14.3753, -8.98411, -24.2308, 6.4519, 101.09, 10.0913, -39.2015, 45.5237, 1.26388, -17.0268, 38.2771, 18.3995, -14.4871, 45.904, 6.75887, 112.722, 16.5844, -27.8396, 55.9627, -15, -12.5293, 130, 3.45305, -13.8184, 25.9538, 9.53183, 83.5364, 24.8823, -7.39015, 18.1264, -14.7712, -11.9533, 14.3446, -5.32003, 101.615, -70.9895, -3.69479, 117.113, trial: 0, score: 6 trial: 1, score: 7 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 6.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 39.4119, -44.1555, -13.6454, -6.32841, -53.0421, 90.6143, 14.7637, 58.5744, -45.0253, -10, 63.3768, 26.5241, -15.4129, -78.7366, -1.4247, -3, 27.2107, 0.77798, 45.3889, -90.6551, 6.69175, 118.529, 35.86, -7.62456, -88, -10.3207, -11.3082, -35.6996, 0.99542, -10.7083, -40.4013, 16.7177, 89.943, 21.2595, -36.4281, 57.1199, -0.935867, -21.6395, 42.1762, 4.10257, -10.5332, 45.822, 14.0252, 108.974, 8.28419, -28.3788, 34.3123, -13.94, -14.4334, 110.805, -3.08792, -4.14082, 47.1027, 13.2152, 104.505, 26.703, -3.64451, 36.8989, -11.1876, -15.7999, 3.60333, 1.71835, 97.9673, -55.3563, -1.98977, 119.648, Average Score: 0 trial: 0, score: 6 trial: 1, score: 5 Policy 2: 37.8834, -36.5717, 2.53394, -7.00557, -49.8563, 63.6425, 7.4565, 67.1016, -23.6834, -0.581142, 65.3348, 19.1879, -14.3491, -67.4256, 1.22403, -5.28732, -16.4556, 8.98351, 58.3517, -75.2106, 10.4262, 112.774, 24.2221, 0, -70.0021, -12.5342, -10.3108, -2.16946, 15.0892, 12.2488, -16.0072, 8.88103, 94.5644, 25.8376, -42.1154, 49.4257, 0.312657, -16.7258, 46.2525, 6.89514, -11.6828, 68.5206, 0.213099, 122, 5.39786, -29.2721, 27.9808, -15, -15.3152, 89.5026, 0.12062, 6.82133, 45.3533, 12.0142, 89.9696, 41.6255, -1.0275, 24.5223, -12.694, -16.505, -0.19947, -2.16975, 122, -36.8561, 3.65837, 115.554, Average Score: 5.5 trial: 0, score: 5 trial: 1, score: 4 Policy 3: 28.8808, -42.4281, -19.8195, -2.82935, -53.2809, 59.7826, 2.02097, 74.1484, -5.5862, -7.24765, 85.261, 17.101, -13.1968, -72.935, -2.32068, -3, -15.718, 1.06089, 37.8579, -56.1929, 17.3566, 122, 30.1005, -9.73181, -88, -9.03276, -8.01371, -42.1173, 8.21577, -16.0356, -22.3129, 6.09817, 102.117, 18.1098, -39.611, 40.9567, 0.0695635, -15.6056, 48.8852, 1.37496, -3.1538, 83.6781, 3.28328, 122, 14.4964, -31.8436, 41.6552, -12.6971, -15.5075, 130, -8.97919, -12.8056, 36.1413, 4.92195, 95.9135, 27.9232, -4.10597, 9.76427, -9.67695, -13.8202, -2.41018, -7.56492, 107.829, -56.5819, -5.27083, 107.356, Average Score: 4.5 trial: 0, score: 4 trial: 1, score: 5 Policy 4: 35.1646, -46.3211, -27.3888, -6.46577, -47.787, 71.7058, 9.83745, 57.9734, -34.9647, -2.27865, 72.1559, 13.8091, -17.8809, -83.0957, -1.98362, -3, -15.6161, 1.51179, 35.936, -81.6804, 15.5014, 111.2, 19.7878, -9.36516, -77.6079, -12.3448, -14.9708, -46.1699, 14.3753, -8.98411, -24.2308, 6.4519, 101.09, 10.0913, -39.2015, 45.5237, 1.26388, -17.0268, 38.2771, 18.3995, -14.4871, 45.904, 6.75887, 112.722, 16.5844, -27.8396, 55.9627, -15, -12.5293, 130, 3.45305, -13.8184, 25.9538, 9.53183, 83.5364, 24.8823, -7.39015, 18.1264, -14.7712, -11.9533, 14.3446, -5.32003, 101.615, -70.9895, -3.69479, 117.113, Average Score: 4.5 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 6.5 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 29.6899, -34.2583, -22.0907, -2.10155, -46.8222, 97.5091, 11.8706, 72.6707, -22.0835, -0.560185, 65.7325, 15.2528, -19.8606, -57.9388, 6.41082, -3, 6.66142, 12.1798, 41.4714, -56.6963, 8.20424, 122, 25.8469, -6.36818, -70.7995, -15, -10.9139, -11.9339, 4.68464, 12.0934, -48.6726, 12.5613, 93.4793, 21.5672, -47.2557, 61.7314, -3.44221, -19.6598, 67.7299, 12.3191, 0.396753, 78.3527, 2.90622, 122, 17.2651, -38.4106, 34.5794, -15, -21.0125, 113.499, -9.57484, 11.419, 69.4498, 4.95775, 84.0945, 25.9925, -11.8099, 26.138, -13.5451, -19.4011, -0.20614, 3.4095, 122, -32.2809, -0.477428, 111.32, Policy 2 will be: 35.3776, -43.2552, -12.7781, -8.7148, -55, 77.9143, 13.4437, 69.4752, -26.4423, -6.52267, 68.7977, 28.5111, -8.2917, -61.2133, 5.24049, -7.99851, 28.685, 6.78616, 32.4997, -96.7118, 8.64165, 112.64, 23.0017, -4.90966, -88, -10.623, -8.32363, -46.9732, 4.34342, 10.8825, -41.8653, 15.6899, 101.7, 15.6958, -42.6468, 53.7773, -0.789714, -21.3489, 38.2624, 12.2183, -0.720403, 56.1078, 11.0695, 122, 10.9316, -37.2614, 30.0355, -15, -21.0228, 130, -8.79543, 8.18159, 43.0263, 19.6362, 102.936, 37.2033, -4.52047, 33.6999, -15, -12.3252, 28.4148, 2.36271, 101.256, -33.9735, -9.84297, 120.529, Policy 3 will be: 27.0683, -38.7216, -10.8616, -8.09903, -55, 73.3847, -0.290763, 55.2057, -27.7924, -4.13924, 82.1594, 28.1296, -17.953, -78.7448, 5.48246, -6.37778, -17.1592, 5.20746, 53.4998, -77.7767, 14.7401, 110.309, 21.6026, -3.31555, -71.6902, -13.7427, -8.76628, -0.766072, 15.0728, 1.85332, -28.2948, 20.3276, 102.3, 11.5238, -44.4593, 52.1287, -5.25343, -24.1193, 51.2572, 11.8406, -0.744998, 88.8961, 1.40338, 122, 3.35802, -36.5751, 33.2422, -12.8869, -13.9677, 112.682, 7.82621, 11.2157, 48.2701, 15.1788, 93.6322, 42.2203, -4.42822, 10.5134, -13.7284, -12.2091, 13.9613, -7.7138, 97.5451, -37.2427, -3.11005, 110.133, Policy 4 will be: 25.5106, -47.5272, -27.7716, -6.00783, -49.8153, 104.689, 13.4212, 52.3275, -51.1196, -9.58432, 68.5358, 28.6476, -9.28334, -67.514, -1.64067, -7.04712, 2.11884, 7.46894, 47.5377, -69.3066, 9.18157, 115.49, 17.6636, -8.9109, -83.8408, -7.31085, -8.43873, -35.1013, 8.10007, 9.56267, -26.4745, 1.76352, 82.9626, 9.15056, -45.1197, 50.274, -9.48762, -15.5907, 64.0763, 0.463611, -15.3075, 66.547, 12.4251, 122, 5.0722, -31.1143, 61.8286, -14.0704, -21.5488, 96.7276, 4.78028, -14.7568, 68.1208, 6.06969, 100.592, 32.4983, -8.18031, 7.37388, -10.0603, -16.5713, -1.02681, -3.4575, 105.388, -59.1307, 1.60912, 122, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 6 trial: 0, score: 4 trial: 1, score: 4 Policy 1: 29.6899, -34.2583, -22.0907, -2.10155, -46.8222, 97.5091, 11.8706, 72.6707, -22.0835, -0.560185, 65.7325, 15.2528, -19.8606, -57.9388, 6.41082, -3, 6.66142, 12.1798, 41.4714, -56.6963, 8.20424, 122, 25.8469, -6.36818, -70.7995, -15, -10.9139, -11.9339, 4.68464, 12.0934, -48.6726, 12.5613, 93.4793, 21.5672, -47.2557, 61.7314, -3.44221, -19.6598, 67.7299, 12.3191, 0.396753, 78.3527, 2.90622, 122, 17.2651, -38.4106, 34.5794, -15, -21.0125, 113.499, -9.57484, 11.419, 69.4498, 4.95775, 84.0945, 25.9925, -11.8099, 26.138, -13.5451, -19.4011, -0.20614, 3.4095, 122, -32.2809, -0.477428, 111.32, Average Score: 4 trial: 0, score: 3 trial: 1, score: 0 Policy 2: 35.3776, -43.2552, -12.7781, -8.7148, -55, 77.9143, 13.4437, 69.4752, -26.4423, -6.52267, 68.7977, 28.5111, -8.2917, -61.2133, 5.24049, -7.99851, 28.685, 6.78616, 32.4997, -96.7118, 8.64165, 112.64, 23.0017, -4.90966, -88, -10.623, -8.32363, -46.9732, 4.34342, 10.8825, -41.8653, 15.6899, 101.7, 15.6958, -42.6468, 53.7773, -0.789714, -21.3489, 38.2624, 12.2183, -0.720403, 56.1078, 11.0695, 122, 10.9316, -37.2614, 30.0355, -15, -21.0228, 130, -8.79543, 8.18159, 43.0263, 19.6362, 102.936, 37.2033, -4.52047, 33.6999, -15, -12.3252, 28.4148, 2.36271, 101.256, -33.9735, -9.84297, 120.529, Average Score: 1.5 trial: 0, score: 0 trial: 1, score: 4 Policy 3: 27.0683, -38.7216, -10.8616, -8.09903, -55, 73.3847, -0.290763, 55.2057, -27.7924, -4.13924, 82.1594, 28.1296, -17.953, -78.7448, 5.48246, -6.37778, -17.1592, 5.20746, 53.4998, -77.7767, 14.7401, 110.309, 21.6026, -3.31555, -71.6902, -13.7427, -8.76628, -0.766072, 15.0728, 1.85332, -28.2948, 20.3276, 102.3, 11.5238, -44.4593, 52.1287, -5.25343, -24.1193, 51.2572, 11.8406, -0.744998, 88.8961, 1.40338, 122, 3.35802, -36.5751, 33.2422, -12.8869, -13.9677, 112.682, 7.82621, 11.2157, 48.2701, 15.1788, 93.6322, 42.2203, -4.42822, 10.5134, -13.7284, -12.2091, 13.9613, -7.7138, 97.5451, -37.2427, -3.11005, 110.133, Average Score: 2 trial: 0, score: 5 trial: 1, score: 6 Policy 4: 25.5106, -47.5272, -27.7716, -6.00783, -49.8153, 104.689, 13.4212, 52.3275, -51.1196, -9.58432, 68.5358, 28.6476, -9.28334, -67.514, -1.64067, -7.04712, 2.11884, 7.46894, 47.5377, -69.3066, 9.18157, 115.49, 17.6636, -8.9109, -83.8408, -7.31085, -8.43873, -35.1013, 8.10007, 9.56267, -26.4745, 1.76352, 82.9626, 9.15056, -45.1197, 50.274, -9.48762, -15.5907, 64.0763, 0.463611, -15.3075, 66.547, 12.4251, 122, 5.0722, -31.1143, 61.8286, -14.0704, -21.5488, 96.7276, 4.78028, -14.7568, 68.1208, 6.06969, 100.592, 32.4983, -8.18031, 7.37388, -10.0603, -16.5713, -1.02681, -3.4575, 105.388, -59.1307, 1.60912, 122, Average Score: 5.5 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 6 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 30.5983, -36.951, 1.67416, -4.38007, -55, 97.5042, 12.8773, 53.9851, -45.6082, -10, 80.2044, 26.8464, -8.88629, -68.4203, 2.81101, -3, -4.02724, -4.0258, 48.0379, -61.4816, 13.6833, 121.032, 31.6242, -0.715441, -88, -10.4789, -15.957, -39.4324, 14.3423, 10.2567, -50.8125, 11.0956, 87.0677, 20.4487, -48.3888, 56.7909, 0.730505, -16.3643, 42.0625, 9.61222, -5.68652, 73.0314, -0.245076, 116.462, 12.619, -41.6245, 49.4056, -15, -20.5671, 118.693, 2.36672, 3.87996, 69.6258, 7.61424, 98.5158, 32.1162, -5.97417, 15.2433, -13.215, -16.8768, 2.72397, 9.44325, 121.199, -48.6814, -2.47996, 122, Policy 2 will be: 22.373, -35.7536, -18.8084, -0.942337, -51.3321, 98.3035, -0.936186, 70.2266, -17.5117, -10, 89.2843, 26.0263, -8.0887, -58.6654, 0.166296, -5.76519, -18.6329, 13.9513, 32.3301, -91.0822, 6.95414, 122, 24.0388, -2.36327, -78.2258, -15, -14.2434, -20.1786, 0.0652926, 4.64523, -44.0371, 0.842371, 105.952, 13.709, -46.4788, 55.3081, 0.795607, -14.3832, 58.2717, 17.5098, -13.3616, 76.1587, 7.61274, 121.446, 5.45365, -31.5588, 46.3382, -15, -16.6262, 130, -10, 7.57605, 46.1594, 8.20635, 85.483, 27.1692, -11.943, 6.82547, -15, -21.549, 1.61843, 8.67781, 106.128, -75.8113, -8.17171, 115.174, Policy 3 will be: 26.1903, -40.526, -23.6775, 0.651923, -48.0376, 85.5597, -0.505856, 63.889, -41.7384, -10, 81.4688, 30.5865, -11.2996, -54.4426, 5.79383, -3, 0.275948, 14.6774, 56.0884, -60.9981, 3.57199, 113.671, 32.4733, -2.68937, -88, -15, -5.98964, -6.55851, 16.0674, -6.51404, -37.7011, 10.0869, 98.5912, 20.9485, -40.5845, 39.8515, -3.27351, -20.6722, 57.0237, 6.02676, -21.6323, 75.0812, 8.33742, 122, 4.716, -38.1034, 45.0172, -12.8804, -13.7091, 129.452, -8.33381, -5.74957, 39.5138, 11.8357, 96.8002, 33.8252, -11.6004, 9.9704, -15, -13.9635, -16.9887, 5.97237, 102.02, -31.8314, -5.60249, 122, Policy 4 will be: 39.7728, -38.0708, -19.2603, -0.493513, -55, 98.8334, 3.92319, 64.0469, -18.6608, -7.60517, 70.4776, 13.6573, -18.2395, -87.8191, 1.02848, -3, 17.6666, 7.55951, 48.9654, -76.187, 8.00627, 115.521, 21.1299, -5.22466, -88, -12.9851, -16.1142, -37.7536, 10.3628, -13.6283, -21.8035, 15.3988, 99.0608, 11.7591, -49.8852, 60.8785, -2.6047, -14.0837, 72.7815, 9.69556, -22.8821, 71.5772, 7.19846, 121.167, 0, -28.2303, 38.7689, -15, -15.2098, 110.327, -5.50564, -9.57955, 35.6636, 3.31913, 105.141, 29.6061, -12.9428, 16.8278, -12.2317, -20.2135, 2.73865, -3.17274, 111.709, -66.9183, -2.93654, 113.304, trial: 0, score: 7 trial: 1, score: 6 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 6.5 trial: 0, score: 6 trial: 1, score: 6 Policy 1: 30.5983, -36.951, 1.67416, -4.38007, -55, 97.5042, 12.8773, 53.9851, -45.6082, -10, 80.2044, 26.8464, -8.88629, -68.4203, 2.81101, -3, -4.02724, -4.0258, 48.0379, -61.4816, 13.6833, 121.032, 31.6242, -0.715441, -88, -10.4789, -15.957, -39.4324, 14.3423, 10.2567, -50.8125, 11.0956, 87.0677, 20.4487, -48.3888, 56.7909, 0.730505, -16.3643, 42.0625, 9.61222, -5.68652, 73.0314, -0.245076, 116.462, 12.619, -41.6245, 49.4056, -15, -20.5671, 118.693, 2.36672, 3.87996, 69.6258, 7.61424, 98.5158, 32.1162, -5.97417, 15.2433, -13.215, -16.8768, 2.72397, 9.44325, 121.199, -48.6814, -2.47996, 122, Average Score: 6 trial: 0, score: 3 trial: 1, score: 3 Policy 2: 22.373, -35.7536, -18.8084, -0.942337, -51.3321, 98.3035, -0.936186, 70.2266, -17.5117, -10, 89.2843, 26.0263, -8.0887, -58.6654, 0.166296, -5.76519, -18.6329, 13.9513, 32.3301, -91.0822, 6.95414, 122, 24.0388, -2.36327, -78.2258, -15, -14.2434, -20.1786, 0.0652926, 4.64523, -44.0371, 0.842371, 105.952, 13.709, -46.4788, 55.3081, 0.795607, -14.3832, 58.2717, 17.5098, -13.3616, 76.1587, 7.61274, 121.446, 5.45365, -31.5588, 46.3382, -15, -16.6262, 130, -10, 7.57605, 46.1594, 8.20635, 85.483, 27.1692, -11.943, 6.82547, -15, -21.549, 1.61843, 8.67781, 106.128, -75.8113, -8.17171, 115.174, Average Score: 3 trial: 0, score: 5 trial: 1, score: 6 Policy 3: 26.1903, -40.526, -23.6775, 0.651923, -48.0376, 85.5597, -0.505856, 63.889, -41.7384, -10, 81.4688, 30.5865, -11.2996, -54.4426, 5.79383, -3, 0.275948, 14.6774, 56.0884, -60.9981, 3.57199, 113.671, 32.4733, -2.68937, -88, -15, -5.98964, -6.55851, 16.0674, -6.51404, -37.7011, 10.0869, 98.5912, 20.9485, -40.5845, 39.8515, -3.27351, -20.6722, 57.0237, 6.02676, -21.6323, 75.0812, 8.33742, 122, 4.716, -38.1034, 45.0172, -12.8804, -13.7091, 129.452, -8.33381, -5.74957, 39.5138, 11.8357, 96.8002, 33.8252, -11.6004, 9.9704, -15, -13.9635, -16.9887, 5.97237, 102.02, -31.8314, -5.60249, 122, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 39.7728, -38.0708, -19.2603, -0.493513, -55, 98.8334, 3.92319, 64.0469, -18.6608, -7.60517, 70.4776, 13.6573, -18.2395, -87.8191, 1.02848, -3, 17.6666, 7.55951, 48.9654, -76.187, 8.00627, 115.521, 21.1299, -5.22466, -88, -12.9851, -16.1142, -37.7536, 10.3628, -13.6283, -21.8035, 15.3988, 99.0608, 11.7591, -49.8852, 60.8785, -2.6047, -14.0837, 72.7815, 9.69556, -22.8821, 71.5772, 7.19846, 121.167, 0, -28.2303, 38.7689, -15, -15.2098, 110.327, -5.50564, -9.57955, 35.6636, 3.31913, 105.141, 29.6061, -12.9428, 16.8278, -12.2317, -20.2135, 2.73865, -3.17274, 111.709, -66.9183, -2.93654, 113.304, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Current Best Policy Score: 6.5 Policy 0 will be: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Policy 1 will be: 33.8154, -33.8341, -7.85691, -3.55777, -55, 74.6988, -1.89552, 64.06, -45.1658, -10, 73.4354, 23.8916, -11.2716, -78.742, 1.97321, -3, -4.85312, 3.78632, 32.5059, -82.0325, 17.1934, 122, 28.5255, -4.04669, -88, -12.639, -10.4194, -32.3238, 4.56982, 9.19688, -33.2005, 4.58645, 103.761, 19.6713, -42.2056, 68.0297, -1.0255, -21.2959, 73.3207, 15.1027, -25, 75.9185, 16.2133, 122, 9.23424, -35.1106, 40.4413, -11.742, -19.8527, 119.002, -4.51017, 11.1615, 27.8677, 16.6512, 105.845, 42.9272, -2.78655, 40.2858, -15, -13.0829, 16.1133, 5.41427, 115.735, -51.4467, -1.07541, 122, Policy 2 will be: 32.6447, -43.663, -4.35496, -4.05147, -48.0798, 91.6032, 10.5898, 56.1325, -44.8846, -5.06214, 87.8939, 13.5368, -20.2913, -57.7731, 7.15379, -3, -0.874947, -0.757598, 55.306, -64.2425, 9.71624, 112.121, 30.6763, -2.77008, -88, -14.4312, -10.5713, -5.86409, 7.51582, -15.7743, -32.9158, 14.6501, 99.072, 21.6523, -44.7028, 65.7819, 1.05726, -19.2359, 54.4213, 4.19499, -10.4525, 56.7327, 1.89013, 122, 16.8956, -34.0433, 43.7928, -12.6815, -19.2362, 88.7483, 3.72019, 1.46963, 33.635, 12.5782, 98.3271, 30.6317, -4.6167, 36.6692, -11.2373, -15.0371, 1.81454, -7.4452, 110.128, -39.2753, -4.61769, 121.245, Policy 3 will be: 27.7082, -39.6331, -29.3795, -5.73801, -48.6564, 100.014, 1.65991, 72.1259, -50.4214, -3.25748, 81.0001, 25.4148, -16.8254, -87.9405, -2.63051, -3.30516, -10.0498, 1.67289, 45.0563, -55.403, 6.7895, 122, 25.3639, -8.1093, -85.637, -8.74057, -9.89659, -13.9875, 8.96839, 4.77235, -33.6455, 17.7279, 109.078, 12.3044, -48.298, 63.3601, -10.0999, -19.4383, 57.0053, 4.58942, 0.338687, 75.6183, -0.0803854, 115.198, 0, -34.0036, 40.0798, -10.6177, -14.4834, 90.6543, 5.03675, 7.29343, 52.7172, 13.9641, 85.2266, 40.9689, -0.63387, 24.495, -13.2647, -19.0951, 0.745743, -8.58057, 110.635, -69.1689, -0.405488, 110.017, Policy 4 will be: 29.5541, -48.0254, -30.7493, -0.675692, -54.3792, 101.433, 14.6408, 74.1861, -27.0229, -10, 66.9736, 28.544, -15.5052, -71.9375, 4.78748, -4.06982, 23.9561, -3.79295, 55.047, -93.0692, 0.606248, 116.316, 20.6857, -0.502233, -88, -6.22582, -13.326, -35.5941, 1.10544, 0.378669, -42.3079, 13.7711, 106.202, 8.42623, -39.0182, 70.3627, -3.67152, -19.5588, 72.1332, 7.55143, -22.7597, 71.455, 18.3853, 118.074, 2.32828, -28.3271, 40.8329, -15, -14.4949, 97.9031, -10, 8.91513, 38.9805, 18.9756, 102.372, 23.8985, -5.85262, 39.9209, -15, -16.1642, 21.1028, -8.00015, 108.196, -48.1569, -0.103223, 122, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 31.5809, -40.8559, -13.8331, -3.50416, -52.2256, 81.8825, 7.35778, 63.5, -27.7992, -10, 75.9669, 23.4913, -15.0757, -70.8296, 1.54188, -3, 5.29004, 4.88923, 46.5436, -79.1872, 8.55768, 122, 27.1776, -2.56504, -87.1426, -10.5721, -10.364, -22.8109, 9.37374, -2.0655, -31.9416, 10.556, 94.7226, 18.0048, -43.4413, 53.0357, -4.57532, -18.4698, 61.6512, 9.1035, -13.1202, 65.2692, 8.88355, 122, 7.48799, -34.3103, 44.6574, -15, -16.2503, 112.447, -1.42204, -0.287637, 49.2174, 11.8266, 91.4166, 32.9484, -8.01429, 22.7909, -15, -17.6023, 6.72264, -0.174547, 111.157, -52.6909, -4.71534, 121.341, Average Score: 6 trial: 0, score: 8 trial: 1, score: 5 Policy 1: 33.8154, -33.8341, -7.85691, -3.55777, -55, 74.6988, -1.89552, 64.06, -45.1658, -10, 73.4354, 23.8916, -11.2716, -78.742, 1.97321, -3, -4.85312, 3.78632, 32.5059, -82.0325, 17.1934, 122, 28.5255, -4.04669, -88, -12.639, -10.4194, -32.3238, 4.56982, 9.19688, -33.2005, 4.58645, 103.761, 19.6713, -42.2056, 68.0297, -1.0255, -21.2959, 73.3207, 15.1027, -25, 75.9185, 16.2133, 122, 9.23424, -35.1106, 40.4413, -11.742, -19.8527, 119.002, -4.51017, 11.1615, 27.8677, 16.6512, 105.845, 42.9272, -2.78655, 40.2858, -15, -13.0829, 16.1133, 5.41427, 115.735, -51.4467, -1.07541, 122, Average Score: 6.5 trial: 0, score: 5 trial: 1, score: 5 Policy 2: 32.6447, -43.663, -4.35496, -4.05147, -48.0798, 91.6032, 10.5898, 56.1325, -44.8846, -5.06214, 87.8939, 13.5368, -20.2913, -57.7731, 7.15379, -3, -0.874947, -0.757598, 55.306, -64.2425, 9.71624, 112.121, 30.6763, -2.77008, -88, -14.4312, -10.5713, -5.86409, 7.51582, -15.7743, -32.9158, 14.6501, 99.072, 21.6523, -44.7028, 65.7819, 1.05726, -19.2359, 54.4213, 4.19499, -10.4525, 56.7327, 1.89013, 122, 16.8956, -34.0433, 43.7928, -12.6815, -19.2362, 88.7483, 3.72019, 1.46963, 33.635, 12.5782, 98.3271, 30.6317, -4.6167, 36.6692, -11.2373, -15.0371, 1.81454, -7.4452, 110.128, -39.2753, -4.61769, 121.245, Average Score: 5 trial: 0, score: 3 trial: 1, score: 3 Policy 3: 27.7082, -39.6331, -29.3795, -5.73801, -48.6564, 100.014, 1.65991, 72.1259, -50.4214, -3.25748, 81.0001, 25.4148, -16.8254, -87.9405, -2.63051, -3.30516, -10.0498, 1.67289, 45.0563, -55.403, 6.7895, 122, 25.3639, -8.1093, -85.637, -8.74057, -9.89659, -13.9875, 8.96839, 4.77235, -33.6455, 17.7279, 109.078, 12.3044, -48.298, 63.3601, -10.0999, -19.4383, 57.0053, 4.58942, 0.338687, 75.6183, -0.0803854, 115.198, 0, -34.0036, 40.0798, -10.6177, -14.4834, 90.6543, 5.03675, 7.29343, 52.7172, 13.9641, 85.2266, 40.9689, -0.63387, 24.495, -13.2647, -19.0951, 0.745743, -8.58057, 110.635, -69.1689, -0.405488, 110.017, Average Score: 3 trial: 0, score: 3 trial: 1, score: 0 Policy 4: 29.5541, -48.0254, -30.7493, -0.675692, -54.3792, 101.433, 14.6408, 74.1861, -27.0229, -10, 66.9736, 28.544, -15.5052, -71.9375, 4.78748, -4.06982, 23.9561, -3.79295, 55.047, -93.0692, 0.606248, 116.316, 20.6857, -0.502233, -88, -6.22582, -13.326, -35.5941, 1.10544, 0.378669, -42.3079, 13.7711, 106.202, 8.42623, -39.0182, 70.3627, -3.67152, -19.5588, 72.1332, 7.55143, -22.7597, 71.455, 18.3853, 118.074, 2.32828, -28.3271, 40.8329, -15, -14.4949, 97.9031, -10, 8.91513, 38.9805, 18.9756, 102.372, 23.8985, -5.85262, 39.9209, -15, -16.1642, 21.1028, -8.00015, 108.196, -48.1569, -0.103223, 122, Average Score: 1.5 --------------------------------- New Iteration Current Best Policy: 33.8154, -33.8341, -7.85691, -3.55777, -55, 74.6988, -1.89552, 64.06, -45.1658, -10, 73.4354, 23.8916, -11.2716, -78.742, 1.97321, -3, -4.85312, 3.78632, 32.5059, -82.0325, 17.1934, 122, 28.5255, -4.04669, -88, -12.639, -10.4194, -32.3238, 4.56982, 9.19688, -33.2005, 4.58645, 103.761, 19.6713, -42.2056, 68.0297, -1.0255, -21.2959, 73.3207, 15.1027, -25, 75.9185, 16.2133, 122, 9.23424, -35.1106, 40.4413, -11.742, -19.8527, 119.002, -4.51017, 11.1615, 27.8677, 16.6512, 105.845, 42.9272, -2.78655, 40.2858, -15, -13.0829, 16.1133, 5.41427, 115.735, -51.4467, -1.07541, 122, Current Best Policy Score: 6.5 Policy 0 will be: 33.8154, -33.8341, -7.85691, -3.55777, -55, 74.6988, -1.89552, 64.06, -45.1658, -10, 73.4354, 23.8916, -11.2716, -78.742, 1.97321, -3, -4.85312, 3.78632, 32.5059, -82.0325, 17.1934, 122, 28.5255, -4.04669, -88, -12.639, -10.4194, -32.3238, 4.56982, 9.19688, -33.2005, 4.58645, 103.761, 19.6713, -42.2056, 68.0297, -1.0255, -21.2959, 73.3207, 15.1027, -25, 75.9185, 16.2133, 122, 9.23424, -35.1106, 40.4413, -11.742, -19.8527, 119.002, -4.51017, 11.1615, 27.8677, 16.6512, 105.845, 42.9272, -2.78655, 40.2858, -15, -13.0829, 16.1133, 5.41427, 115.735, -51.4467, -1.07541, 122, Policy 1 will be: 39.6239, -27.849, -6.16513, -7.03144, -55, 84.5477, -7.79387, 78.7146, -52.0066, -10, 63.6415, 23.739, -15.9008, -66.577, 7.8662, -3.49835, -7.39806, 5.74597, 18.2327, -89.1054, 22.7115, 122, 21.6987, -11.1705, -88, -8.32055, -9.87121, -22.7689, -2.19201, -0.334426, -40.2977, -3.92779, 109.946, 14.936, -38.4154, 67.957, -0.774359, -21.2402, 83.1517, 11.3514, -12.588, 62.2157, 14.2766, 122, 8.27439, -32.8828, 52.3534, -12.8946, -24.3591, 130, -6.42486, -3.06528, 21.7099, 12.2331, 109.196, 36.08, -2.27208, 51.0284, -15, -15.7065, 28.7529, 0.628, 116.604, -52.3468, 0.45139, 110.915, Policy 2 will be: 26.5555, -34.7228, -10.7225, -6.46236, -55, 81.8363, -0.637026, 77.6849, -62.2292, -10, 60.0632, 18.3013, -16.09, -79.3565, -0.0746889, -7.41429, -20.4636, -0.114844, 35.8313, -104.474, 17.5181, 122, 26.4342, -11.4975, -72.2259, -15, -6.95788, -9.84941, 0.928331, 4.84337, -22.5732, -1.97658, 94.625, 16.8892, -47.1731, 80.5289, -0.493398, -15.6224, 95.2318, 23.6764, -21.1775, 55.8781, 14.6014, 122, 6.07601, -27.9218, 38.8619, -7.40696, -22.5668, 130, -4.60142, 23.0064, 32.6239, 21.6213, 116.43, 45.1028, 0, 28.079, -15, -14.2991, 39.4106, 1.12503, 122, -41.2899, -1.56298, 122, Policy 3 will be: 27.9228, -39.5693, 6.84959, -4.11206, -55, 85.8744, 4.68838, 68.2195, -22.596, -10, 81.624, 23.3727, -15.1047, -65.2737, 5.86152, -3, -22.4231, 6.33099, 40.323, -72.0745, 26.0684, 116.165, 31.1677, -1.10759, -88, -8.14319, -6.31799, -33.5186, 11.0726, 15.3352, -40.9802, -0.923241, 107.879, 20.7936, -35.4886, 68.3935, -1.08009, -25.3138, 66.7192, 18.6314, -25, 68.2955, 17.2246, 108.96, 0.902683, -40.419, 55.8512, -15, -17.2988, 130, -7.27339, 18.1635, 39.2963, 10.1233, 94.095, 34.9673, -1.00412, 35.4924, -10.2313, -8.09716, 12.9809, 15.0613, 108.425, -45.4198, 8.24997, 118.768, Policy 4 will be: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, trial: 0, score: 5 trial: 1, score: 7 Policy 0: 33.8154, -33.8341, -7.85691, -3.55777, -55, 74.6988, -1.89552, 64.06, -45.1658, -10, 73.4354, 23.8916, -11.2716, -78.742, 1.97321, -3, -4.85312, 3.78632, 32.5059, -82.0325, 17.1934, 122, 28.5255, -4.04669, -88, -12.639, -10.4194, -32.3238, 4.56982, 9.19688, -33.2005, 4.58645, 103.761, 19.6713, -42.2056, 68.0297, -1.0255, -21.2959, 73.3207, 15.1027, -25, 75.9185, 16.2133, 122, 9.23424, -35.1106, 40.4413, -11.742, -19.8527, 119.002, -4.51017, 11.1615, 27.8677, 16.6512, 105.845, 42.9272, -2.78655, 40.2858, -15, -13.0829, 16.1133, 5.41427, 115.735, -51.4467, -1.07541, 122, Average Score: 6 trial: 0, score: 3 trial: 1, score: 5 Policy 1: 39.6239, -27.849, -6.16513, -7.03144, -55, 84.5477, -7.79387, 78.7146, -52.0066, -10, 63.6415, 23.739, -15.9008, -66.577, 7.8662, -3.49835, -7.39806, 5.74597, 18.2327, -89.1054, 22.7115, 122, 21.6987, -11.1705, -88, -8.32055, -9.87121, -22.7689, -2.19201, -0.334426, -40.2977, -3.92779, 109.946, 14.936, -38.4154, 67.957, -0.774359, -21.2402, 83.1517, 11.3514, -12.588, 62.2157, 14.2766, 122, 8.27439, -32.8828, 52.3534, -12.8946, -24.3591, 130, -6.42486, -3.06528, 21.7099, 12.2331, 109.196, 36.08, -2.27208, 51.0284, -15, -15.7065, 28.7529, 0.628, 116.604, -52.3468, 0.45139, 110.915, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 26.5555, -34.7228, -10.7225, -6.46236, -55, 81.8363, -0.637026, 77.6849, -62.2292, -10, 60.0632, 18.3013, -16.09, -79.3565, -0.0746889, -7.41429, -20.4636, -0.114844, 35.8313, -104.474, 17.5181, 122, 26.4342, -11.4975, -72.2259, -15, -6.95788, -9.84941, 0.928331, 4.84337, -22.5732, -1.97658, 94.625, 16.8892, -47.1731, 80.5289, -0.493398, -15.6224, 95.2318, 23.6764, -21.1775, 55.8781, 14.6014, 122, 6.07601, -27.9218, 38.8619, -7.40696, -22.5668, 130, -4.60142, 23.0064, 32.6239, 21.6213, 116.43, 45.1028, 0, 28.079, -15, -14.2991, 39.4106, 1.12503, 122, -41.2899, -1.56298, 122, Average Score: 0 trial: 0, score: 4 trial: 1, score: 4 Policy 3: 27.9228, -39.5693, 6.84959, -4.11206, -55, 85.8744, 4.68838, 68.2195, -22.596, -10, 81.624, 23.3727, -15.1047, -65.2737, 5.86152, -3, -22.4231, 6.33099, 40.323, -72.0745, 26.0684, 116.165, 31.1677, -1.10759, -88, -8.14319, -6.31799, -33.5186, 11.0726, 15.3352, -40.9802, -0.923241, 107.879, 20.7936, -35.4886, 68.3935, -1.08009, -25.3138, 66.7192, 18.6314, -25, 68.2955, 17.2246, 108.96, 0.902683, -40.419, 55.8512, -15, -17.2988, 130, -7.27339, 18.1635, 39.2963, 10.1233, 94.095, 34.9673, -1.00412, 35.4924, -10.2313, -8.09716, 12.9809, 15.0613, 108.425, -45.4198, 8.24997, 118.768, Average Score: 4 trial: 0, score: 6 trial: 1, score: 7 Policy 4: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Average Score: 6.5 --------------------------------- New Iteration Current Best Policy: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Current Best Policy Score: 6.5 Policy 0 will be: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Policy 1 will be: 51.577, -22.4264, 1.55219, -10.6831, -54.2868, 60.884, -10, 68.0017, -43.0204, -7.63748, 77.8003, 18.9647, -7.09554, -66.9226, 3.09132, -8.69085, -42.9855, 7.29925, 27.9576, -57.4612, 20.2965, 120.665, 20.6717, -1.72171, -88, -4.3468, -18.0022, -24.1858, 7.00443, 18.5947, -41.6832, -5.86707, 84.929, 35.7925, -43.609, 48.7842, -1.06402, -20.2236, 101.621, 12.2349, -15.8547, 75.7162, 19.1828, 122, 12.7738, -42.8326, 19.7853, -15, -19.1155, 114.308, -9.62, 14.3964, 41.4813, 23.6777, 95.0889, 31.4618, 0, 26.7859, -15, -19.9509, 27.0217, 5.5971, 122, -56.4472, 0.743613, 122, Policy 2 will be: 37.9259, -31.2622, -0.714142, -2.9992, -55, 99.6608, -2.24789, 71.1105, -54.7591, -10, 67.4171, 32.6017, -7.8338, -66.5233, 3.31306, -3, -12.0257, 7.88966, 35.0434, -82.0093, 17.7042, 107.312, 17.7728, -3.90309, -86.5374, -3.39477, -12.195, -34.1978, 0.105556, -4.68852, -28.1753, -0.409522, 99.5916, 22.7716, -52.1543, 53.5864, 0.950314, -19.9079, 77.8728, 5.78902, -11.2028, 72.2132, 13.2144, 122, 19.0226, -34.5901, 30.729, -15, -22.1401, 130, -10, -2.00423, 31.5847, 29.8993, 95.892, 29.4457, -1.77791, 31.6885, -15, -19.4577, -12.4967, 3.36381, 116.855, -58.1599, 6.5351, 122, Policy 3 will be: 39.4945, -24.6282, 8.58109, -4.67122, -51.0448, 88.5123, -6.66859, 65.1947, -32.6857, -2.1126, 61.1389, 20.9886, -8.04114, -83.6964, 10.0745, -9.5942, -4.81334, 9.27777, 37.856, -88.9946, 23.0821, 112.785, 29.723, -4.4009, -88, -5.54335, -8.91793, -31.0692, 5.59017, -1.82872, -63.9008, -1.66127, 86.6503, 26.5254, -49.1603, 55.5959, 5.87147, -15.2347, 65.0693, 14.0474, -17.1897, 90.7707, 30.5929, 114.957, 14.2384, -40.1924, 12.8416, -11.3621, -16.856, 123.039, -6.0213, 2.32352, 42.0813, 19.2259, 111.331, 39.1608, 0, 37.0235, -15, -17.5932, 11.4072, 8.11597, 122, -33.0875, -1.41241, 115.205, Policy 4 will be: 41.9134, -24.9659, 20.3131, -1.56259, -49.3234, 63.2152, -10, 65.8179, -31.545, -10, 62.4194, 23.5213, -12.4333, -76.0805, 7.72631, -3, -35.8163, 15.9503, 26.5013, -82.0309, 21.3716, 119.975, 15.6706, -4.81952, -79.3156, -15, -7.12393, -20.8129, -4.39028, -2.87326, -36.2524, 6.84313, 91.4123, 32.9857, -49.1727, 41.7492, 2.59589, -15.2046, 60.4132, 16.4907, -19.2516, 88.9493, 15.729, 122, 23.2806, -36.6816, 9.87231, -10.8684, -20.6391, 130, -1.70917, 6.12424, 25.5112, 31.9677, 100.334, 36.8083, 0, 27.6091, -15, -9.48465, 6.15786, -4.11204, 113.644, -53.4048, 0.46895, 107.721, trial: 0, score: 7 trial: 1, score: 8 Policy 0: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Average Score: 7.5 trial: 0, score: 6 trial: 1, score: 6 Policy 1: 51.577, -22.4264, 1.55219, -10.6831, -54.2868, 60.884, -10, 68.0017, -43.0204, -7.63748, 77.8003, 18.9647, -7.09554, -66.9226, 3.09132, -8.69085, -42.9855, 7.29925, 27.9576, -57.4612, 20.2965, 120.665, 20.6717, -1.72171, -88, -4.3468, -18.0022, -24.1858, 7.00443, 18.5947, -41.6832, -5.86707, 84.929, 35.7925, -43.609, 48.7842, -1.06402, -20.2236, 101.621, 12.2349, -15.8547, 75.7162, 19.1828, 122, 12.7738, -42.8326, 19.7853, -15, -19.1155, 114.308, -9.62, 14.3964, 41.4813, 23.6777, 95.0889, 31.4618, 0, 26.7859, -15, -19.9509, 27.0217, 5.5971, 122, -56.4472, 0.743613, 122, Average Score: 6 trial: 0, score: 2 trial: 1, score: 1 Policy 2: 37.9259, -31.2622, -0.714142, -2.9992, -55, 99.6608, -2.24789, 71.1105, -54.7591, -10, 67.4171, 32.6017, -7.8338, -66.5233, 3.31306, -3, -12.0257, 7.88966, 35.0434, -82.0093, 17.7042, 107.312, 17.7728, -3.90309, -86.5374, -3.39477, -12.195, -34.1978, 0.105556, -4.68852, -28.1753, -0.409522, 99.5916, 22.7716, -52.1543, 53.5864, 0.950314, -19.9079, 77.8728, 5.78902, -11.2028, 72.2132, 13.2144, 122, 19.0226, -34.5901, 30.729, -15, -22.1401, 130, -10, -2.00423, 31.5847, 29.8993, 95.892, 29.4457, -1.77791, 31.6885, -15, -19.4577, -12.4967, 3.36381, 116.855, -58.1599, 6.5351, 122, Average Score: 1.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 39.4945, -24.6282, 8.58109, -4.67122, -51.0448, 88.5123, -6.66859, 65.1947, -32.6857, -2.1126, 61.1389, 20.9886, -8.04114, -83.6964, 10.0745, -9.5942, -4.81334, 9.27777, 37.856, -88.9946, 23.0821, 112.785, 29.723, -4.4009, -88, -5.54335, -8.91793, -31.0692, 5.59017, -1.82872, -63.9008, -1.66127, 86.6503, 26.5254, -49.1603, 55.5959, 5.87147, -15.2347, 65.0693, 14.0474, -17.1897, 90.7707, 30.5929, 114.957, 14.2384, -40.1924, 12.8416, -11.3621, -16.856, 123.039, -6.0213, 2.32352, 42.0813, 19.2259, 111.331, 39.1608, 0, 37.0235, -15, -17.5932, 11.4072, 8.11597, 122, -33.0875, -1.41241, 115.205, Average Score: 0 trial: 0, score: 0 trial: 1, score: 9 Policy 4: 41.9134, -24.9659, 20.3131, -1.56259, -49.3234, 63.2152, -10, 65.8179, -31.545, -10, 62.4194, 23.5213, -12.4333, -76.0805, 7.72631, -3, -35.8163, 15.9503, 26.5013, -82.0309, 21.3716, 119.975, 15.6706, -4.81952, -79.3156, -15, -7.12393, -20.8129, -4.39028, -2.87326, -36.2524, 6.84313, 91.4123, 32.9857, -49.1727, 41.7492, 2.59589, -15.2046, 60.4132, 16.4907, -19.2516, 88.9493, 15.729, 122, 23.2806, -36.6816, 9.87231, -10.8684, -20.6391, 130, -1.70917, 6.12424, 25.5112, 31.9677, 100.334, 36.8083, 0, 27.6091, -15, -9.48465, 6.15786, -4.11204, 113.644, -53.4048, 0.46895, 107.721, Average Score: 4.5 --------------------------------- New Iteration Current Best Policy: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Current Best Policy Score: 7.5 Policy 0 will be: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Policy 1 will be: 40.9546, -33.1661, 9.07715, -4.00101, -55, 90.9877, 4.29962, 63.8503, -53.32, -8.24176, 52.4837, 22.7169, 0, -52.3259, 11.1094, -5.54599, -12.3082, 0.708028, 35.1442, -62.2456, 24.5756, 109.3, 15.8422, 0, -88, -10.8201, -17.7734, 7.60066, 8.59501, 12.3175, -31.5268, 3.99376, 94.5488, 17.0982, -40.0177, 67.0807, 3.87052, -15.255, 85.6999, 13.8773, -25, 82.5961, 17.7634, 122, 22.1596, -31.4201, 41.7603, -14.6377, -15.9321, 110.208, 0.208374, 3.43908, 18.1256, 23.441, 118.325, 26.4092, -0.891925, 23.8306, -10.5779, -13.0461, 23.3311, 0.794873, 108.184, -65.1713, -3.95354, 121.686, Policy 2 will be: 37.6756, -29.1415, 8.55391, 0.936655, -55, 72.7369, 3.45856, 78.406, -40.9594, -10, 60.851, 19.5284, -3.06946, -74.0061, 7.75257, -6.34663, -43.229, 3.47765, 32.7386, -84.0845, 29.136, 111.522, 23.6272, -2.80913, -77.8241, -9.38518, -9.19346, -3.57443, -1.86826, -9.55291, -33.6902, 2.80581, 97.433, 26.9324, -47.8828, 40.6632, 4.68537, -23.4737, 71.1265, 14.014, -25, 71.6422, 20.1059, 122, 19.1531, -31.8467, 20.709, -10.0348, -17.656, 128.28, -9.25351, 23.1799, 33.5153, 28.3258, 116.979, 26.692, 0, 43.8045, -13.1692, -11.2555, 4.03071, 11.9453, 122, -25.7844, 13.3534, 122, Policy 3 will be: 40.089, -31.8338, 12.8736, -3.70953, -54.9053, 92.3232, 0.897533, 56.0765, -34.5714, -10, 76.5941, 14.4642, -11.3908, -60.0016, 8.5591, -11.3387, -31.8517, 13.2485, 23.7422, -80.5893, 16.5644, 119.64, 21.9134, -1.14048, -79.1092, -5.16414, -18.1101, -27.8798, -5.40487, 3.09439, -35.6176, 7.3684, 91.6657, 29.1059, -38.6588, 53.447, 4.31816, -18.7636, 99.0075, 8.89956, -23.9133, 84.966, 21.7697, 122, 25.5358, -44.6728, 23.2125, -12.4803, -22.8665, 130, 1.87289, 8.54556, 43.6305, 19.0068, 114.94, 37.8019, 0, 54.1229, -15, -11.1918, 11.0943, 0.743065, 122, -53.8435, -3.94015, 122, Policy 4 will be: 39.3656, -25.1267, 19.7876, -9.99216, -55, 98.1094, -5.659, 60.9252, -35.4013, -10, 53.4983, 26.6204, 0, -82.2068, 5.19817, -3, -13.7676, 9.88652, 48.6124, -63.4487, 35.1639, 119.314, 26.2016, -2.27127, -75.7762, -5.0705, -10.8277, 0.794246, 2.84762, 0.313908, -68.1384, -8.79105, 89.6249, 34.8941, -49.5012, 69.164, 9.35544, -17.7594, 61.5347, 19.6342, -15.7742, 109.745, 15.3992, 110.975, 26.6951, -38.9168, 32.9317, -15, -16.0856, 107.985, 2.38574, 14.8423, 28.5118, 24.2345, 114.989, 42.9275, 0, 34.836, -12.0293, -14.9314, -9.02789, 4.45979, 122, -22.7704, 11.6007, 122, trial: 0, score: 7 trial: 1, score: 6 Policy 0: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Average Score: 6.5 trial: 0, score: 3 trial: 1, score: 5 Policy 1: 40.9546, -33.1661, 9.07715, -4.00101, -55, 90.9877, 4.29962, 63.8503, -53.32, -8.24176, 52.4837, 22.7169, 0, -52.3259, 11.1094, -5.54599, -12.3082, 0.708028, 35.1442, -62.2456, 24.5756, 109.3, 15.8422, 0, -88, -10.8201, -17.7734, 7.60066, 8.59501, 12.3175, -31.5268, 3.99376, 94.5488, 17.0982, -40.0177, 67.0807, 3.87052, -15.255, 85.6999, 13.8773, -25, 82.5961, 17.7634, 122, 22.1596, -31.4201, 41.7603, -14.6377, -15.9321, 110.208, 0.208374, 3.43908, 18.1256, 23.441, 118.325, 26.4092, -0.891925, 23.8306, -10.5779, -13.0461, 23.3311, 0.794873, 108.184, -65.1713, -3.95354, 121.686, Average Score: 4 trial: 0, score: 5 trial: 1, score: 0 Policy 2: 37.6756, -29.1415, 8.55391, 0.936655, -55, 72.7369, 3.45856, 78.406, -40.9594, -10, 60.851, 19.5284, -3.06946, -74.0061, 7.75257, -6.34663, -43.229, 3.47765, 32.7386, -84.0845, 29.136, 111.522, 23.6272, -2.80913, -77.8241, -9.38518, -9.19346, -3.57443, -1.86826, -9.55291, -33.6902, 2.80581, 97.433, 26.9324, -47.8828, 40.6632, 4.68537, -23.4737, 71.1265, 14.014, -25, 71.6422, 20.1059, 122, 19.1531, -31.8467, 20.709, -10.0348, -17.656, 128.28, -9.25351, 23.1799, 33.5153, 28.3258, 116.979, 26.692, 0, 43.8045, -13.1692, -11.2555, 4.03071, 11.9453, 122, -25.7844, 13.3534, 122, Average Score: 2.5 trial: 0, score: 4 trial: 1, score: 4 Policy 3: 40.089, -31.8338, 12.8736, -3.70953, -54.9053, 92.3232, 0.897533, 56.0765, -34.5714, -10, 76.5941, 14.4642, -11.3908, -60.0016, 8.5591, -11.3387, -31.8517, 13.2485, 23.7422, -80.5893, 16.5644, 119.64, 21.9134, -1.14048, -79.1092, -5.16414, -18.1101, -27.8798, -5.40487, 3.09439, -35.6176, 7.3684, 91.6657, 29.1059, -38.6588, 53.447, 4.31816, -18.7636, 99.0075, 8.89956, -23.9133, 84.966, 21.7697, 122, 25.5358, -44.6728, 23.2125, -12.4803, -22.8665, 130, 1.87289, 8.54556, 43.6305, 19.0068, 114.94, 37.8019, 0, 54.1229, -15, -11.1918, 11.0943, 0.743065, 122, -53.8435, -3.94015, 122, Average Score: 4 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 39.3656, -25.1267, 19.7876, -9.99216, -55, 98.1094, -5.659, 60.9252, -35.4013, -10, 53.4983, 26.6204, 0, -82.2068, 5.19817, -3, -13.7676, 9.88652, 48.6124, -63.4487, 35.1639, 119.314, 26.2016, -2.27127, -75.7762, -5.0705, -10.8277, 0.794246, 2.84762, 0.313908, -68.1384, -8.79105, 89.6249, 34.8941, -49.5012, 69.164, 9.35544, -17.7594, 61.5347, 19.6342, -15.7742, 109.745, 15.3992, 110.975, 26.6951, -38.9168, 32.9317, -15, -16.0856, 107.985, 2.38574, 14.8423, 28.5118, 24.2345, 114.989, 42.9275, 0, 34.836, -12.0293, -14.9314, -9.02789, 4.45979, 122, -22.7704, 11.6007, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Current Best Policy Score: 6.5 Policy 0 will be: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Policy 1 will be: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Policy 2 will be: 49.1481, -21.9165, -5.10776, -1.94991, -49.6992, 54.0321, -10, 53.5127, -45.2369, -10, 72.0688, 16.9029, -0.0377679, -55.6233, 7.58898, -4.75297, -46.2234, 4.58343, 39.4347, -66.6664, 18.2987, 115.492, 23.1184, -10.2619, -88, -6.39134, -15.5214, 7.05868, -5.59934, 17.4519, -51.4668, 4.82769, 78.8338, 34.9483, -40.1774, 66.5008, 1.84888, -24.8635, 58.6057, 14.8647, -25, 77.839, 21.952, 122, 11.517, -35.4814, 22.0545, -10.9121, -20.4412, 130, -10, 7.44536, 19.8789, 27.4588, 94.6676, 38.5271, -6.11495, 31.8267, -15, -16.36, -5.74426, 5.36673, 122, -55.6763, -2.59677, 120.054, Policy 3 will be: 35.5816, -22.9209, -0.339179, -0.938595, -50.3652, 68.0326, 1.02429, 63.0573, -57.408, -5.04264, 69.7384, 30.6368, -2.24848, -84.8367, 6.08406, -7.27734, -38.9571, -0.28327, 44.6195, -91.7926, 29.4257, 122, 24.8199, -6.3178, -88, -13.5495, -9.31809, -34.0259, -8.16241, -1.91371, -44.2451, -7.52853, 82.9208, 20.439, -41.3463, 39.7125, 8.95767, -23.8218, 77.8386, 17.8239, -25, 95.3192, 13.7734, 122, 11.3692, -38.6497, 26.5283, -14.4585, -21.4342, 130, -6.01029, 3.6998, 16.9685, 19.471, 94.4829, 33.2126, 0, 49.075, -14.0322, -20.998, 2.6597, -0.0292597, 115.085, -44.4252, -0.979019, 107.941, Policy 4 will be: 44.808, -34.299, 3.0537, -8.7502, -53.8386, 76.0878, -8.34397, 69.3101, -29.5838, -10, 61.2042, 23.7512, -2.7445, -55.1542, 1.36065, -3.95834, -16.8078, 7.88675, 34.5506, -50.4787, 34.3295, 116.623, 10.1468, -3.6718, -88, -10.6949, -17.2757, 0.17041, -7.56268, -0.415849, -36.7161, -9.17466, 86.8124, 20.6865, -39.1782, 65.4419, 9.99357, -15.5961, 54.3252, 5.62704, -15.3097, 74.2902, 11.9268, 117.188, 14.5629, -41.4878, 16.0825, -11.0127, -16.8203, 118.652, 0.275803, 21.8545, 31.3045, 15.5112, 106.273, 37.4672, 0, 46.8965, -9.12836, -17.7217, 10.1551, 8.66753, 122, -67.3097, -1.77996, 113.426, trial: 0, score: 6 trial: 1, score: 8 Policy 0: 42.9101, -27.0284, 4.14017, -4.70668, -55, 76.982, -4.79606, 64.8182, -46.3681, -10, 65.4155, 23.1366, -5.41879, -69.377, 6.66947, -5.80628, -24.2091, 8.78205, 36.7724, -74.13, 25.7255, 122, 19.8764, -4.3428, -88, -9.37422, -12.4835, -10.4283, -1.13077, 4.59844, -44.4941, -1.35682, 91.0595, 26.0112, -46.0197, 57.2966, 4.76001, -20.3004, 78.4637, 10.3961, -25, 95.3476, 21.0577, 122, 16.7407, -38.1342, 27.4011, -15, -21.3494, 128.533, -2.24312, 11.4384, 19.3939, 24.9263, 107.177, 35.116, 0, 40.0046, -15, -15.2574, 7.32282, 5.47558, 122, -43.4153, 4.43759, 122, Average Score: 7 trial: 0, score: 8 trial: 1, score: 8 Policy 1: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Average Score: 8 trial: 0, score: 5 trial: 1, score: 5 Policy 2: 49.1481, -21.9165, -5.10776, -1.94991, -49.6992, 54.0321, -10, 53.5127, -45.2369, -10, 72.0688, 16.9029, -0.0377679, -55.6233, 7.58898, -4.75297, -46.2234, 4.58343, 39.4347, -66.6664, 18.2987, 115.492, 23.1184, -10.2619, -88, -6.39134, -15.5214, 7.05868, -5.59934, 17.4519, -51.4668, 4.82769, 78.8338, 34.9483, -40.1774, 66.5008, 1.84888, -24.8635, 58.6057, 14.8647, -25, 77.839, 21.952, 122, 11.517, -35.4814, 22.0545, -10.9121, -20.4412, 130, -10, 7.44536, 19.8789, 27.4588, 94.6676, 38.5271, -6.11495, 31.8267, -15, -16.36, -5.74426, 5.36673, 122, -55.6763, -2.59677, 120.054, Average Score: 5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 35.5816, -22.9209, -0.339179, -0.938595, -50.3652, 68.0326, 1.02429, 63.0573, -57.408, -5.04264, 69.7384, 30.6368, -2.24848, -84.8367, 6.08406, -7.27734, -38.9571, -0.28327, 44.6195, -91.7926, 29.4257, 122, 24.8199, -6.3178, -88, -13.5495, -9.31809, -34.0259, -8.16241, -1.91371, -44.2451, -7.52853, 82.9208, 20.439, -41.3463, 39.7125, 8.95767, -23.8218, 77.8386, 17.8239, -25, 95.3192, 13.7734, 122, 11.3692, -38.6497, 26.5283, -14.4585, -21.4342, 130, -6.01029, 3.6998, 16.9685, 19.471, 94.4829, 33.2126, 0, 49.075, -14.0322, -20.998, 2.6597, -0.0292597, 115.085, -44.4252, -0.979019, 107.941, Average Score: 0 trial: 0, score: 5 trial: 1, score: 3 Policy 4: 44.808, -34.299, 3.0537, -8.7502, -53.8386, 76.0878, -8.34397, 69.3101, -29.5838, -10, 61.2042, 23.7512, -2.7445, -55.1542, 1.36065, -3.95834, -16.8078, 7.88675, 34.5506, -50.4787, 34.3295, 116.623, 10.1468, -3.6718, -88, -10.6949, -17.2757, 0.17041, -7.56268, -0.415849, -36.7161, -9.17466, 86.8124, 20.6865, -39.1782, 65.4419, 9.99357, -15.5961, 54.3252, 5.62704, -15.3097, 74.2902, 11.9268, 117.188, 14.5629, -41.4878, 16.0825, -11.0127, -16.8203, 118.652, 0.275803, 21.8545, 31.3045, 15.5112, 106.273, 37.4672, 0, 46.8965, -9.12836, -17.7217, 10.1551, 8.66753, 122, -67.3097, -1.77996, 113.426, Average Score: 4 --------------------------------- New Iteration Current Best Policy: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Current Best Policy Score: 8 Policy 0 will be: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Policy 1 will be: 46.6342, -26.8138, 12.8698, -2.71195, -47.9253, 73.9581, -2.36606, 73.573, -59.0143, 3.66245, 57.6366, 22.7412, -5.17499, -82.0606, 11.1732, -5.19814, -4.78435, 8.35026, 29.5447, -95.719, 18.4027, 122, 25.1362, -8.85321, -86.6182, -3.93118, -11.7699, -6.89977, -0.538121, 2.38705, -48.8015, -1.67281, 105.8, 17.3038, -43.258, 38.155, 10.4944, -27.7973, 85.3638, 5.21801, -15.0178, 115, 22.7123, 122, 18.0697, -40.5858, 26.6669, -12.5161, -18.5402, 130, -4.95632, 28.3186, -11.1915, 27.6219, 115.552, 29.2895, 0, 29.0289, -15, -17.7153, 10.7018, 1.67373, 121.88, -46.4958, 1.61218, 110.631, Policy 2 will be: 56.2108, -27.0849, -4.68732, -5.89919, -46.8902, 103.247, -9.12867, 66.3495, -41.5315, 2.24907, 63.9311, 29.816, -6.91622, -88, 11.6558, -3, -23.4353, -2.41091, 48.3128, -55.1984, 32.2809, 104.678, 8.24155, -8.33914, -88, -11.5667, -15.4847, 6.58171, 12.2795, -4.1032, -38.279, -5.65072, 101.733, 25.6063, -45.4345, 33.7202, 4.56838, -22.8268, 106.894, 2.85805, -18.5478, 111.62, 18.1671, 116.943, 31.0656, -39.9409, 21.1438, -15, -23.0656, 128.659, -9.21626, 29.3007, 14.1898, 24.6235, 97.3544, 41.8206, -7.36936, 39.73, -11.2589, -15.2256, 11.4858, -9.80224, 113.889, -39.5855, -2.04633, 113.957, Policy 3 will be: 41.6742, -32.0436, 14.351, -3.92328, -53.5858, 71.2685, -4.71651, 65.4534, -54.8298, -3.63639, 72.6289, 33.986, -0.841261, -88, 7.40802, -3.563, -10.8127, 5.44821, 30.7913, -70.7747, 20.592, 122, 21.6242, -3.34596, -88, -14.7117, -14.2912, -25.8357, -6.53471, -4.64806, -25.2698, -0.970489, 90.9454, 11.7923, -38.2369, 42.284, 7.69958, -28.7396, 107.631, 7.67964, -13.3132, 115, 23.8118, 101.709, 15.0078, -39.2728, 12.6921, -15, -21.1214, 130, -9.89496, 34.3463, 14.8343, 30.1611, 108.87, 33.9043, -10.7337, 42.1443, -10.1931, -17.7207, -3.42319, -8.93338, 119.252, -46.5298, 8.57106, 119.98, Policy 4 will be: 42.833, -36.7029, -3.30682, -0.370793, -49.0552, 67.7193, -2.20695, 65.6796, -89.0289, -2.68288, 61.4101, 25.3759, -10.4451, -74.1964, 3.80504, -3.04878, -13.7607, 7.18904, 26.5465, -65.3144, 19.09, 121.56, 24.5877, 0, -88, -14.5162, -12.4919, -25.9128, 0.0142602, 14.4598, -33.1491, 5.86774, 99.0461, 15.8598, -48.2536, 28.9028, 4.56681, -22.9149, 87.6685, 11.5333, -22.3132, 92.1269, 19.8331, 115.247, 31.4045, -36.2786, 21.1309, -15, -25.5247, 130, -0.973179, 24.6834, -3.7978, 19.4726, 100.044, 32.368, -13.2354, 28.8686, -7.34186, -15.1287, -5.91514, 1.92331, 122, -46.8522, -0.0751843, 114.52, trial: 0, score: 5 trial: 1, score: 6 Policy 0: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 46.6342, -26.8138, 12.8698, -2.71195, -47.9253, 73.9581, -2.36606, 73.573, -59.0143, 3.66245, 57.6366, 22.7412, -5.17499, -82.0606, 11.1732, -5.19814, -4.78435, 8.35026, 29.5447, -95.719, 18.4027, 122, 25.1362, -8.85321, -86.6182, -3.93118, -11.7699, -6.89977, -0.538121, 2.38705, -48.8015, -1.67281, 105.8, 17.3038, -43.258, 38.155, 10.4944, -27.7973, 85.3638, 5.21801, -15.0178, 115, 22.7123, 122, 18.0697, -40.5858, 26.6669, -12.5161, -18.5402, 130, -4.95632, 28.3186, -11.1915, 27.6219, 115.552, 29.2895, 0, 29.0289, -15, -17.7153, 10.7018, 1.67373, 121.88, -46.4958, 1.61218, 110.631, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 56.2108, -27.0849, -4.68732, -5.89919, -46.8902, 103.247, -9.12867, 66.3495, -41.5315, 2.24907, 63.9311, 29.816, -6.91622, -88, 11.6558, -3, -23.4353, -2.41091, 48.3128, -55.1984, 32.2809, 104.678, 8.24155, -8.33914, -88, -11.5667, -15.4847, 6.58171, 12.2795, -4.1032, -38.279, -5.65072, 101.733, 25.6063, -45.4345, 33.7202, 4.56838, -22.8268, 106.894, 2.85805, -18.5478, 111.62, 18.1671, 116.943, 31.0656, -39.9409, 21.1438, -15, -23.0656, 128.659, -9.21626, 29.3007, 14.1898, 24.6235, 97.3544, 41.8206, -7.36936, 39.73, -11.2589, -15.2256, 11.4858, -9.80224, 113.889, -39.5855, -2.04633, 113.957, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 41.6742, -32.0436, 14.351, -3.92328, -53.5858, 71.2685, -4.71651, 65.4534, -54.8298, -3.63639, 72.6289, 33.986, -0.841261, -88, 7.40802, -3.563, -10.8127, 5.44821, 30.7913, -70.7747, 20.592, 122, 21.6242, -3.34596, -88, -14.7117, -14.2912, -25.8357, -6.53471, -4.64806, -25.2698, -0.970489, 90.9454, 11.7923, -38.2369, 42.284, 7.69958, -28.7396, 107.631, 7.67964, -13.3132, 115, 23.8118, 101.709, 15.0078, -39.2728, 12.6921, -15, -21.1214, 130, -9.89496, 34.3463, 14.8343, 30.1611, 108.87, 33.9043, -10.7337, 42.1443, -10.1931, -17.7207, -3.42319, -8.93338, 119.252, -46.5298, 8.57106, 119.98, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 42.833, -36.7029, -3.30682, -0.370793, -49.0552, 67.7193, -2.20695, 65.6796, -89.0289, -2.68288, 61.4101, 25.3759, -10.4451, -74.1964, 3.80504, -3.04878, -13.7607, 7.18904, 26.5465, -65.3144, 19.09, 121.56, 24.5877, 0, -88, -14.5162, -12.4919, -25.9128, 0.0142602, 14.4598, -33.1491, 5.86774, 99.0461, 15.8598, -48.2536, 28.9028, 4.56681, -22.9149, 87.6685, 11.5333, -22.3132, 92.1269, 19.8331, 115.247, 31.4045, -36.2786, 21.1309, -15, -25.5247, 130, -0.973179, 24.6834, -3.7978, 19.4726, 100.044, 32.368, -13.2354, 28.8686, -7.34186, -15.1287, -5.91514, 1.92331, 122, -46.8522, -0.0751843, 114.52, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Current Best Policy Score: 5.5 Policy 0 will be: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Policy 1 will be: 41.2012, -32.4229, -1.44055, -0.297612, -55, 106.106, -0.619707, 69.5868, -55.2296, -10, 63.1142, 30.6687, -14.2659, -75.2924, 5.16835, -3, -26.7735, -3.20075, 54.5014, -50.2205, 15.6564, 122, 23.5256, -5.25856, -88, -9.71131, -16.39, -26.5273, 4.81028, 5.14963, -62.5185, 7.88803, 89.3852, 19.8628, -37.8247, 23.7631, 10.8296, -24.0205, 74.1861, 21.1563, -25, 115, 23.0228, 119.052, 21.4289, -37.818, 29.9448, -13.2475, -18.5505, 130, -0.524415, 36.6072, -21.1393, 19.4612, 115.652, 31.168, -4.40267, 52.7573, -12.8371, -17.3421, 13.2146, 3.31389, 112.217, -82.994, 5.95575, 122, Policy 2 will be: 48.5673, -28.4223, 18.8777, -2.19662, -55, 64.7936, -10, 88.2544, -54.5231, -6.34668, 66.7547, 27.606, -7.41637, -64.1665, 7.81289, -6.39962, -34.7274, -6.10629, 50.9944, -64.1633, 19.1505, 111.762, 22.3533, -7.55098, -85.7595, -10.2718, -11.0614, -24.5878, 6.84599, -4.26758, -25.1359, 12.6152, 100.467, 20.1837, -47.0666, 48.7835, 3.24056, -27.4956, 85.0377, 18.4914, -24.9589, 110.218, 29.1738, 104.649, 25.0177, -42.5992, 10.1825, -9.82123, -22.0814, 101.09, -1.81133, 19.2668, 16.3395, 27.4125, 102.897, 31.3368, -0.983771, 49.0576, -9.48155, -20.5744, -16.8582, -9.49605, 122, -51.811, 9.53897, 115.747, Policy 3 will be: 48.2705, -28.2428, 8.73494, -1.77604, -51.6465, 98.0171, -10, 67.4256, -85.535, -10, 67.465, 25.9251, -2.6054, -72.1582, 0.897715, -6.99337, -29.0059, -5.35085, 49.4525, -80.7592, 14.5008, 122, 18.2802, -0.336768, -86.0679, -9.93807, -17.7899, 9.54845, 10.206, -4.40292, -30.521, -4.98429, 81.5292, 22.0639, -40.8172, 52.3406, 6.15679, -29.0071, 92.1096, 15.4053, -25, 102.478, 26.0962, 122, 30.9109, -36.5077, 16.3676, -15, -24.6488, 110.555, -10, 33.0615, 11.8961, 14.685, 106.916, 43.8682, -11.3091, 26.7101, -15, -22.9445, -1.79023, -8.10115, 122, -83.6683, 7.78937, 122, Policy 4 will be: 50.1459, -32.9059, 0.802088, 1.075, -48.9461, 73.0323, -3.61732, 65.4683, -71.6009, -10, 74.2108, 29.5806, -8.12828, -88, 7.09228, -3, -26.2964, 8.75081, 42.0076, -54.7792, 24.6695, 122, 17.6493, -1.0571, -75.9055, -3.84579, -20.1326, -13.1347, -4.70288, -6.08124, -40.2395, 4.06143, 95.597, 14.7191, -49.7793, 32.057, 7.88004, -24.4773, 89.3329, 9.26979, -25, 92.1222, 21.457, 114.299, 19.4766, -47.5107, 8.44817, -15, -21.737, 130, -0.335615, 32.2455, -20.891, 13.6193, 93.3298, 38.9026, -0.888398, 57.7091, -10.2401, -22.5537, -4.01925, -10, 108.248, -77.943, 4.33654, 122, trial: 0, score: 0 trial: 1, score: 8 Policy 0: 46.3546, -31.0116, 12.0114, -1.19589, -51.4894, 82.9449, -10, 74.593, -65.1588, -3.66831, 60.0986, 26.8449, -6.93137, -81.706, 6.05893, -3, -26.6368, 0.590586, 41.2206, -73.9244, 23.9317, 114.388, 16.7516, -3.24001, -88, -8.72233, -15.8962, -2.73666, 3.15435, -0.223314, -38.987, 3.49809, 92.5572, 18.5112, -43.2336, 41.266, 7.68928, -24.2677, 93.7912, 11.9837, -25, 103.961, 26.4269, 115.124, 24.5987, -41.1677, 21.7823, -14.1533, -21.9113, 118.634, -8.60532, 25.4703, -3.75414, 22.6907, 106.534, 38.5265, -5.89223, 42.6881, -13.1779, -20.6703, -8.6825, -1.70731, 122, -62.3198, 6.62383, 122, Average Score: 4 trial: 0, score: 0 trial: 1, score: 5 Policy 1: 41.2012, -32.4229, -1.44055, -0.297612, -55, 106.106, -0.619707, 69.5868, -55.2296, -10, 63.1142, 30.6687, -14.2659, -75.2924, 5.16835, -3, -26.7735, -3.20075, 54.5014, -50.2205, 15.6564, 122, 23.5256, -5.25856, -88, -9.71131, -16.39, -26.5273, 4.81028, 5.14963, -62.5185, 7.88803, 89.3852, 19.8628, -37.8247, 23.7631, 10.8296, -24.0205, 74.1861, 21.1563, -25, 115, 23.0228, 119.052, 21.4289, -37.818, 29.9448, -13.2475, -18.5505, 130, -0.524415, 36.6072, -21.1393, 19.4612, 115.652, 31.168, -4.40267, 52.7573, -12.8371, -17.3421, 13.2146, 3.31389, 112.217, -82.994, 5.95575, 122, Average Score: 2.5 trial: 0, score: 6 trial: 1, score: 7 Policy 2: 48.5673, -28.4223, 18.8777, -2.19662, -55, 64.7936, -10, 88.2544, -54.5231, -6.34668, 66.7547, 27.606, -7.41637, -64.1665, 7.81289, -6.39962, -34.7274, -6.10629, 50.9944, -64.1633, 19.1505, 111.762, 22.3533, -7.55098, -85.7595, -10.2718, -11.0614, -24.5878, 6.84599, -4.26758, -25.1359, 12.6152, 100.467, 20.1837, -47.0666, 48.7835, 3.24056, -27.4956, 85.0377, 18.4914, -24.9589, 110.218, 29.1738, 104.649, 25.0177, -42.5992, 10.1825, -9.82123, -22.0814, 101.09, -1.81133, 19.2668, 16.3395, 27.4125, 102.897, 31.3368, -0.983771, 49.0576, -9.48155, -20.5744, -16.8582, -9.49605, 122, -51.811, 9.53897, 115.747, Average Score: 6.5 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 48.2705, -28.2428, 8.73494, -1.77604, -51.6465, 98.0171, -10, 67.4256, -85.535, -10, 67.465, 25.9251, -2.6054, -72.1582, 0.897715, -6.99337, -29.0059, -5.35085, 49.4525, -80.7592, 14.5008, 122, 18.2802, -0.336768, -86.0679, -9.93807, -17.7899, 9.54845, 10.206, -4.40292, -30.521, -4.98429, 81.5292, 22.0639, -40.8172, 52.3406, 6.15679, -29.0071, 92.1096, 15.4053, -25, 102.478, 26.0962, 122, 30.9109, -36.5077, 16.3676, -15, -24.6488, 110.555, -10, 33.0615, 11.8961, 14.685, 106.916, 43.8682, -11.3091, 26.7101, -15, -22.9445, -1.79023, -8.10115, 122, -83.6683, 7.78937, 122, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 50.1459, -32.9059, 0.802088, 1.075, -48.9461, 73.0323, -3.61732, 65.4683, -71.6009, -10, 74.2108, 29.5806, -8.12828, -88, 7.09228, -3, -26.2964, 8.75081, 42.0076, -54.7792, 24.6695, 122, 17.6493, -1.0571, -75.9055, -3.84579, -20.1326, -13.1347, -4.70288, -6.08124, -40.2395, 4.06143, 95.597, 14.7191, -49.7793, 32.057, 7.88004, -24.4773, 89.3329, 9.26979, -25, 92.1222, 21.457, 114.299, 19.4766, -47.5107, 8.44817, -15, -21.737, 130, -0.335615, 32.2455, -20.891, 13.6193, 93.3298, 38.9026, -0.888398, 57.7091, -10.2401, -22.5537, -4.01925, -10, 108.248, -77.943, 4.33654, 122, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 48.5673, -28.4223, 18.8777, -2.19662, -55, 64.7936, -10, 88.2544, -54.5231, -6.34668, 66.7547, 27.606, -7.41637, -64.1665, 7.81289, -6.39962, -34.7274, -6.10629, 50.9944, -64.1633, 19.1505, 111.762, 22.3533, -7.55098, -85.7595, -10.2718, -11.0614, -24.5878, 6.84599, -4.26758, -25.1359, 12.6152, 100.467, 20.1837, -47.0666, 48.7835, 3.24056, -27.4956, 85.0377, 18.4914, -24.9589, 110.218, 29.1738, 104.649, 25.0177, -42.5992, 10.1825, -9.82123, -22.0814, 101.09, -1.81133, 19.2668, 16.3395, 27.4125, 102.897, 31.3368, -0.983771, 49.0576, -9.48155, -20.5744, -16.8582, -9.49605, 122, -51.811, 9.53897, 115.747, Current Best Policy Score: 6.5 Policy 0 will be: 48.5673, -28.4223, 18.8777, -2.19662, -55, 64.7936, -10, 88.2544, -54.5231, -6.34668, 66.7547, 27.606, -7.41637, -64.1665, 7.81289, -6.39962, -34.7274, -6.10629, 50.9944, -64.1633, 19.1505, 111.762, 22.3533, -7.55098, -85.7595, -10.2718, -11.0614, -24.5878, 6.84599, -4.26758, -25.1359, 12.6152, 100.467, 20.1837, -47.0666, 48.7835, 3.24056, -27.4956, 85.0377, 18.4914, -24.9589, 110.218, 29.1738, 104.649, 25.0177, -42.5992, 10.1825, -9.82123, -22.0814, 101.09, -1.81133, 19.2668, 16.3395, 27.4125, 102.897, 31.3368, -0.983771, 49.0576, -9.48155, -20.5744, -16.8582, -9.49605, 122, -51.811, 9.53897, 115.747, Policy 1 will be: 42.8332, -34.0046, 19.1656, 3.07416, -54.7164, 49.7161, -2.65683, 97.6609, -41.3816, -10, 56.3713, 27.4901, -0.538545, -51.384, 6.79703, -5.15138, -35.5483, 1.33157, 61.4838, -66.7832, 25.0275, 118.382, 20.6696, -0.0786016, -88, -13.7787, -16.0514, -17.7062, 8.8038, 5.60514, -8.58676, 17.9563, 100.92, 29.7306, -48.1828, 53.1335, 4.61154, -29.985, 71.2327, 10.0182, -25, 98.9707, 20.8026, 92.871, 28.6198, -47.1431, 22.7543, -11.1986, -23.0531, 114.106, 5.18873, 22.0476, -1.13048, 19.412, 115.975, 35.1278, 0, 63.574, -11.7893, -24.5132, -23.9251, -10, 122, -54.9176, 14.6099, 122, Policy 2 will be: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Policy 3 will be: 50.2548, -28.8671, 16.7998, -2.29555, -55, 88.6159, -3.48712, 86.8612, -68.4984, -1.40101, 67.9847, 24.0724, -6.22195, -81.5245, 11.4487, -3.40079, -30.8338, -2.57618, 64.9864, -67.5553, 14.2498, 98.9503, 18.0386, -13.5281, -73.1745, -12.382, -15.8857, -43.2265, -0.907038, -5.67994, -7.19938, 11.9399, 105.494, 20.7274, -39.6405, 35.9648, -1.71917, -26.2515, 99.1572, 27.8285, -24.0498, 92.6152, 27.0442, 98.0257, 19.8064, -43.5822, 14.9536, -14.9137, -16.5538, 117.144, -0.233793, 21.9748, 23.5865, 31.985, 93.4279, 33.3332, -7.17133, 42.7171, -11.1116, -17.7359, -10.2905, -9.65186, 122, -72.7416, 0.693405, 102.435, Policy 4 will be: 38.8935, -21.3932, 25.995, -5.59233, -55, 81.8418, -10, 94.6878, -33.1364, 1.0484, 72.3711, 30.4727, -5.38022, -74.4383, 2.79301, -9.12046, -12.7478, -10, 49.5003, -74.8231, 28.024, 121.02, 29.9414, -7.6272, -72.0514, -8.46879, -16.1424, -24.8067, 1.70512, -9.7106, -38.199, 15.0586, 87.396, 14.6565, -42.0455, 48.7974, 5.2878, -30.0758, 93.9433, 19.7766, -12.5364, 92.527, 35.3396, 103.945, 32.859, -48.8919, 23.4725, -4.85182, -22.7216, 103.973, -10, 19.4148, 31.3831, 25.1532, 92.9659, 35.3668, 0, 59.3171, -5.63901, -23.9297, -34.0924, -4.9875, 109.502, -70.5737, -0.120298, 122, trial: 0, score: 0 trial: 1, score: 4 Policy 0: 48.5673, -28.4223, 18.8777, -2.19662, -55, 64.7936, -10, 88.2544, -54.5231, -6.34668, 66.7547, 27.606, -7.41637, -64.1665, 7.81289, -6.39962, -34.7274, -6.10629, 50.9944, -64.1633, 19.1505, 111.762, 22.3533, -7.55098, -85.7595, -10.2718, -11.0614, -24.5878, 6.84599, -4.26758, -25.1359, 12.6152, 100.467, 20.1837, -47.0666, 48.7835, 3.24056, -27.4956, 85.0377, 18.4914, -24.9589, 110.218, 29.1738, 104.649, 25.0177, -42.5992, 10.1825, -9.82123, -22.0814, 101.09, -1.81133, 19.2668, 16.3395, 27.4125, 102.897, 31.3368, -0.983771, 49.0576, -9.48155, -20.5744, -16.8582, -9.49605, 122, -51.811, 9.53897, 115.747, Average Score: 2 trial: 0, score: 2 trial: 1, score: 4 Policy 1: 42.8332, -34.0046, 19.1656, 3.07416, -54.7164, 49.7161, -2.65683, 97.6609, -41.3816, -10, 56.3713, 27.4901, -0.538545, -51.384, 6.79703, -5.15138, -35.5483, 1.33157, 61.4838, -66.7832, 25.0275, 118.382, 20.6696, -0.0786016, -88, -13.7787, -16.0514, -17.7062, 8.8038, 5.60514, -8.58676, 17.9563, 100.92, 29.7306, -48.1828, 53.1335, 4.61154, -29.985, 71.2327, 10.0182, -25, 98.9707, 20.8026, 92.871, 28.6198, -47.1431, 22.7543, -11.1986, -23.0531, 114.106, 5.18873, 22.0476, -1.13048, 19.412, 115.975, 35.1278, 0, 63.574, -11.7893, -24.5132, -23.9251, -10, 122, -54.9176, 14.6099, 122, Average Score: 3 trial: 0, score: 8 trial: 1, score: 9 Policy 2: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Average Score: 8.5 trial: 0, score: 0 trial: 1, score: 7 Policy 3: 50.2548, -28.8671, 16.7998, -2.29555, -55, 88.6159, -3.48712, 86.8612, -68.4984, -1.40101, 67.9847, 24.0724, -6.22195, -81.5245, 11.4487, -3.40079, -30.8338, -2.57618, 64.9864, -67.5553, 14.2498, 98.9503, 18.0386, -13.5281, -73.1745, -12.382, -15.8857, -43.2265, -0.907038, -5.67994, -7.19938, 11.9399, 105.494, 20.7274, -39.6405, 35.9648, -1.71917, -26.2515, 99.1572, 27.8285, -24.0498, 92.6152, 27.0442, 98.0257, 19.8064, -43.5822, 14.9536, -14.9137, -16.5538, 117.144, -0.233793, 21.9748, 23.5865, 31.985, 93.4279, 33.3332, -7.17133, 42.7171, -11.1116, -17.7359, -10.2905, -9.65186, 122, -72.7416, 0.693405, 102.435, Average Score: 3.5 trial: 0, score: 0 trial: 1, score: 3 Policy 4: 38.8935, -21.3932, 25.995, -5.59233, -55, 81.8418, -10, 94.6878, -33.1364, 1.0484, 72.3711, 30.4727, -5.38022, -74.4383, 2.79301, -9.12046, -12.7478, -10, 49.5003, -74.8231, 28.024, 121.02, 29.9414, -7.6272, -72.0514, -8.46879, -16.1424, -24.8067, 1.70512, -9.7106, -38.199, 15.0586, 87.396, 14.6565, -42.0455, 48.7974, 5.2878, -30.0758, 93.9433, 19.7766, -12.5364, 92.527, 35.3396, 103.945, 32.859, -48.8919, 23.4725, -4.85182, -22.7216, 103.973, -10, 19.4148, 31.3831, 25.1532, 92.9659, 35.3668, 0, 59.3171, -5.63901, -23.9297, -34.0924, -4.9875, 109.502, -70.5737, -0.120298, 122, Average Score: 1.5 --------------------------------- New Iteration Current Best Policy: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Current Best Policy Score: 8.5 Policy 0 will be: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Policy 1 will be: 51.2983, -40.4899, 28.8101, 1.07271, -52.5458, 42.5926, -10, 76.428, -54.4273, -10, 73.9829, 18.7153, -7.80262, -83.6438, 7.33175, -9.39429, -22.4317, 3.11945, 60.0553, -85.0831, 8.14574, 120.905, 4.48843, -9.01207, -71.224, -11.3447, -7.62082, -21.7255, 3.46835, 7.27065, 11.1391, 21.3607, 107.995, 6.92193, -49.1557, 42.166, 0.203243, -29.0108, 104.177, 15.6591, -25, 111.377, 37.6625, 109.45, 28.44, -47.5143, 21.3799, -8.87018, -15.7686, 116.935, -1.68336, 21.6682, 2.53746, 35.8733, 105.532, 32.3997, 0, 78.5439, -9.4095, -17.0637, -55.5966, -10, 117.86, -39.3846, 2.76875, 109.813, Policy 2 will be: 56.7597, -40.4473, 13.6779, 0.603542, -55, 63.2899, -2.83219, 87.054, -66.9669, -10, 73.5949, 20.8331, -5.99987, -84.0108, 12.1594, -8.96049, -30.5623, -2.58613, 66.3594, -43.8811, 11.978, 122, 12.6403, 0, -86.8455, -13.4784, -8.886, -48.8553, 15.5791, 4.79418, -17.4252, 14.3154, 108.656, 8.40519, -51.8938, 57.3033, -5.78116, -30.0531, 96.077, 23.353, -25, 96.0998, 43.5261, 112.697, 38.2579, -53.613, 26.5414, -13.3996, -14.7793, 118.363, 1.20578, 7.76596, 47.6028, 35.5994, 103.728, 16.0907, 0, 75.5262, -9.05916, -17.1476, -35.597, -10, 122, -24.6506, 14.8265, 110.88, Policy 3 will be: 63.9614, -40.7255, 25.2094, 0.951387, -55, 43.3905, -6.58171, 83.932, -46.2303, -10, 72.966, 11.329, 0, -88, 6.80743, -8.08828, -34.2584, 2.71745, 66.5174, -57.4189, 22.6787, 122, 14.4185, -4.351, -88, -4.84846, -8.97096, -17.7234, 15.2522, -2.15813, -5.71353, 17.2549, 93.7145, 12.6461, -51.9435, 52.9254, -0.534087, -37.6316, 82.2186, 16.7788, -17.4443, 94.5764, 39.5804, 104.012, 40.7708, -47.8336, -0.286015, -9.54823, -21.2234, 99.2518, 5.58695, 24.2002, 46.9452, 34.6618, 114.512, 16.4902, -6.54081, 46.5676, -7.32276, -23.5496, -58.1902, -10, 122, -55.5564, 8.99033, 120.863, Policy 4 will be: 54.667, -30.6621, 1.57745, 2.62744, -51.6475, 42.9957, -10, 90.8703, -67.7525, -9.15918, 68.0057, 10.1918, -1.19109, -75.6289, 13.4526, -8.17309, 11.1352, -0.872227, 49.8384, -73.5543, 9.67778, 122, 17.8195, -4.16126, -74.995, -4.18755, -10.4452, -16.2121, 10.7903, -0.407554, -8.55232, 9.95523, 110.08, 16.383, -51.0598, 48.8657, -5.72586, -35.1357, 115.381, 31.4841, -25, 111.61, 33.2626, 122, 40.2271, -40.5362, 27.3997, -10.0429, -20.1279, 104.642, 0.219871, 8.1105, 4.1566, 24.9904, 118.898, 29.3871, -0.650427, 76.3455, -9.09655, -26.6742, -40.7902, -10, 116.441, -16.1964, 1.82514, 112.441, trial: 0, score: 6 trial: 1, score: 5 Policy 0: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Average Score: 5.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 51.2983, -40.4899, 28.8101, 1.07271, -52.5458, 42.5926, -10, 76.428, -54.4273, -10, 73.9829, 18.7153, -7.80262, -83.6438, 7.33175, -9.39429, -22.4317, 3.11945, 60.0553, -85.0831, 8.14574, 120.905, 4.48843, -9.01207, -71.224, -11.3447, -7.62082, -21.7255, 3.46835, 7.27065, 11.1391, 21.3607, 107.995, 6.92193, -49.1557, 42.166, 0.203243, -29.0108, 104.177, 15.6591, -25, 111.377, 37.6625, 109.45, 28.44, -47.5143, 21.3799, -8.87018, -15.7686, 116.935, -1.68336, 21.6682, 2.53746, 35.8733, 105.532, 32.3997, 0, 78.5439, -9.4095, -17.0637, -55.5966, -10, 117.86, -39.3846, 2.76875, 109.813, Average Score: 0 trial: 0, score: 5 trial: 1, score: 4 Policy 2: 56.7597, -40.4473, 13.6779, 0.603542, -55, 63.2899, -2.83219, 87.054, -66.9669, -10, 73.5949, 20.8331, -5.99987, -84.0108, 12.1594, -8.96049, -30.5623, -2.58613, 66.3594, -43.8811, 11.978, 122, 12.6403, 0, -86.8455, -13.4784, -8.886, -48.8553, 15.5791, 4.79418, -17.4252, 14.3154, 108.656, 8.40519, -51.8938, 57.3033, -5.78116, -30.0531, 96.077, 23.353, -25, 96.0998, 43.5261, 112.697, 38.2579, -53.613, 26.5414, -13.3996, -14.7793, 118.363, 1.20578, 7.76596, 47.6028, 35.5994, 103.728, 16.0907, 0, 75.5262, -9.05916, -17.1476, -35.597, -10, 122, -24.6506, 14.8265, 110.88, Average Score: 4.5 trial: 0, score: 5 trial: 1, score: 4 Policy 3: 63.9614, -40.7255, 25.2094, 0.951387, -55, 43.3905, -6.58171, 83.932, -46.2303, -10, 72.966, 11.329, 0, -88, 6.80743, -8.08828, -34.2584, 2.71745, 66.5174, -57.4189, 22.6787, 122, 14.4185, -4.351, -88, -4.84846, -8.97096, -17.7234, 15.2522, -2.15813, -5.71353, 17.2549, 93.7145, 12.6461, -51.9435, 52.9254, -0.534087, -37.6316, 82.2186, 16.7788, -17.4443, 94.5764, 39.5804, 104.012, 40.7708, -47.8336, -0.286015, -9.54823, -21.2234, 99.2518, 5.58695, 24.2002, 46.9452, 34.6618, 114.512, 16.4902, -6.54081, 46.5676, -7.32276, -23.5496, -58.1902, -10, 122, -55.5564, 8.99033, 120.863, Average Score: 4.5 trial: 0, score: 1 trial: 1, score: 2 Policy 4: 54.667, -30.6621, 1.57745, 2.62744, -51.6475, 42.9957, -10, 90.8703, -67.7525, -9.15918, 68.0057, 10.1918, -1.19109, -75.6289, 13.4526, -8.17309, 11.1352, -0.872227, 49.8384, -73.5543, 9.67778, 122, 17.8195, -4.16126, -74.995, -4.18755, -10.4452, -16.2121, 10.7903, -0.407554, -8.55232, 9.95523, 110.08, 16.383, -51.0598, 48.8657, -5.72586, -35.1357, 115.381, 31.4841, -25, 111.61, 33.2626, 122, 40.2271, -40.5362, 27.3997, -10.0429, -20.1279, 104.642, 0.219871, 8.1105, 4.1566, 24.9904, 118.898, 29.3871, -0.650427, 76.3455, -9.09655, -26.6742, -40.7902, -10, 116.441, -16.1964, 1.82514, 112.441, Average Score: 1.5 --------------------------------- New Iteration Current Best Policy: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Current Best Policy Score: 5.5 Policy 0 will be: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Policy 1 will be: 63.259, -33.4858, 22.3435, 8.33246, -55, 46.6699, -6.56667, 74.193, -86.4232, -4.06288, 58.0269, 20.5126, -13.7248, -62.5617, 5.66333, -8.31584, 0.412139, -10, 48.6668, -87.9864, 9.95457, 116.596, 19.3475, 0, -68.6686, -2.81429, -4.29986, -35.725, 15.5925, 11.5155, -24.9824, 19.8675, 101.849, 12.0703, -47.1759, 63.0428, 0.878054, -28.6406, 104.108, 28.2128, -25, 109.261, 36.8466, 109.278, 26.5855, -54.2972, 17.8881, -11.0136, -17.7826, 130, 8.46979, 5.38335, 49.697, 20.0064, 98.426, 33.4857, -3.3115, 72.2124, -9.61838, -17.9728, -54.7092, -10, 122, -14.5944, 0.0937754, 104.844, Policy 2 will be: 45.8015, -30.5481, 32.2415, 8.0858, -55, 37.2715, -4.51538, 72.6117, -79.1288, -7.89094, 69.8172, 22.8894, -14.2625, -84.7012, 9.24519, -3, -33.3467, 0.929864, 57.9008, -48.6997, 9.94012, 111.311, 21.6845, -1.52528, -74.7492, -9.75043, -9.32337, -40.5094, -0.566063, -10.8808, 2.22741, 23.5162, 104.741, 22.1385, -43.3078, 61.4756, 2.37341, -27.3762, 74.3214, 26.2346, -25, 115, 31.8852, 120.642, 38.7583, -40.9528, 12.2831, -7.02223, -19.0478, 122.437, 0.0694907, 19.0283, 12.8748, 29.9187, 108.702, 17.1038, 0, 77.5772, -7.15344, -25.9478, -52.7398, -10, 118.294, -38.5141, 11.6257, 112.111, Policy 3 will be: 60.0374, -30.0969, 17.8251, 2.62643, -55, 44.3258, -0.579347, 89.8697, -57.169, -5.49538, 80.7944, 21.3409, -14.1278, -74.1676, 11.3263, -8.64106, -6.84626, -7.75708, 63.1986, -54.0968, 10.2436, 115.839, 14.8639, -6.00042, -72.9527, -9.39795, -8.33739, -57.9725, 13.7148, -12.4532, 3.60133, 8.23604, 98.6396, 21.1139, -47.3503, 34.5029, 2.17638, -33.3042, 87.3116, 32.1889, -25, 115, 34.3261, 112.166, 36.0785, -47.2447, 14.8226, -12.8265, -23.0531, 130, 2.63149, 29.7204, 12.1436, 24.9873, 116.299, 26.8243, -6.42676, 60.5513, -5.18915, -22.9664, -46.0743, -1.42381, 117.554, -45.6248, -1.17568, 117.482, Policy 4 will be: 57.7337, -32.0407, 11.2524, 8.31978, -54.6016, 23.9615, -10, 79.6575, -93.7671, -10, 58.9676, 8.19621, -6.0628, -88, 13.2486, -3, -2.13644, 0.727481, 49.2707, -54.9449, 18.0791, 113.979, 14.6621, -3.75233, -83.298, -12.1975, -14.9099, -54.1073, 8.21741, 6.43837, -3.37014, 19.5202, 94.7441, 21.62, -54.2333, 50.4731, 4.88819, -34.5756, 80.3766, 27.7814, -12.8536, 102.663, 38.7946, 122, 32.6377, -49.4012, 15.3128, -10.9944, -16.1324, 117.133, 8.53314, 30.124, 9.99609, 17.4033, 99.4249, 23.1842, 0, 71.4322, -1.43413, -18.0753, -42.2833, -8.96596, 122, -56.8723, 0.316629, 108.172, trial: 0, score: 5 trial: 1, score: 4 Policy 0: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Average Score: 4.5 trial: 0, score: 0 trial: 1, score: 0 Policy 1: 63.259, -33.4858, 22.3435, 8.33246, -55, 46.6699, -6.56667, 74.193, -86.4232, -4.06288, 58.0269, 20.5126, -13.7248, -62.5617, 5.66333, -8.31584, 0.412139, -10, 48.6668, -87.9864, 9.95457, 116.596, 19.3475, 0, -68.6686, -2.81429, -4.29986, -35.725, 15.5925, 11.5155, -24.9824, 19.8675, 101.849, 12.0703, -47.1759, 63.0428, 0.878054, -28.6406, 104.108, 28.2128, -25, 109.261, 36.8466, 109.278, 26.5855, -54.2972, 17.8881, -11.0136, -17.7826, 130, 8.46979, 5.38335, 49.697, 20.0064, 98.426, 33.4857, -3.3115, 72.2124, -9.61838, -17.9728, -54.7092, -10, 122, -14.5944, 0.0937754, 104.844, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 2: 45.8015, -30.5481, 32.2415, 8.0858, -55, 37.2715, -4.51538, 72.6117, -79.1288, -7.89094, 69.8172, 22.8894, -14.2625, -84.7012, 9.24519, -3, -33.3467, 0.929864, 57.9008, -48.6997, 9.94012, 111.311, 21.6845, -1.52528, -74.7492, -9.75043, -9.32337, -40.5094, -0.566063, -10.8808, 2.22741, 23.5162, 104.741, 22.1385, -43.3078, 61.4756, 2.37341, -27.3762, 74.3214, 26.2346, -25, 115, 31.8852, 120.642, 38.7583, -40.9528, 12.2831, -7.02223, -19.0478, 122.437, 0.0694907, 19.0283, 12.8748, 29.9187, 108.702, 17.1038, 0, 77.5772, -7.15344, -25.9478, -52.7398, -10, 118.294, -38.5141, 11.6257, 112.111, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 3: 60.0374, -30.0969, 17.8251, 2.62643, -55, 44.3258, -0.579347, 89.8697, -57.169, -5.49538, 80.7944, 21.3409, -14.1278, -74.1676, 11.3263, -8.64106, -6.84626, -7.75708, 63.1986, -54.0968, 10.2436, 115.839, 14.8639, -6.00042, -72.9527, -9.39795, -8.33739, -57.9725, 13.7148, -12.4532, 3.60133, 8.23604, 98.6396, 21.1139, -47.3503, 34.5029, 2.17638, -33.3042, 87.3116, 32.1889, -25, 115, 34.3261, 112.166, 36.0785, -47.2447, 14.8226, -12.8265, -23.0531, 130, 2.63149, 29.7204, 12.1436, 24.9873, 116.299, 26.8243, -6.42676, 60.5513, -5.18915, -22.9664, -46.0743, -1.42381, 117.554, -45.6248, -1.17568, 117.482, Average Score: 0 trial: 0, score: 0 trial: 1, score: 0 Policy 4: 57.7337, -32.0407, 11.2524, 8.31978, -54.6016, 23.9615, -10, 79.6575, -93.7671, -10, 58.9676, 8.19621, -6.0628, -88, 13.2486, -3, -2.13644, 0.727481, 49.2707, -54.9449, 18.0791, 113.979, 14.6621, -3.75233, -83.298, -12.1975, -14.9099, -54.1073, 8.21741, 6.43837, -3.37014, 19.5202, 94.7441, 21.62, -54.2333, 50.4731, 4.88819, -34.5756, 80.3766, 27.7814, -12.8536, 102.663, 38.7946, 122, 32.6377, -49.4012, 15.3128, -10.9944, -16.1324, 117.133, 8.53314, 30.124, 9.99609, 17.4033, 99.4249, 23.1842, 0, 71.4322, -1.43413, -18.0753, -42.2833, -8.96596, 122, -56.8723, 0.316629, 108.172, Average Score: 0 --------------------------------- New Iteration Current Best Policy: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Current Best Policy Score: 4.5 Policy 0 will be: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Policy 1 will be: 63.0365, -40.9087, 27.4005, 1.3417, -55, 66.1623, -10, 85.6439, -55.6988, -5.09847, 68.9608, 25.2947, 0, -71.9405, 6.92988, -6.05872, 11.964, -1.52653, 68.3074, -49.6459, 22.0186, 115.789, 12.4185, -3.59655, -82.3289, -5.80649, -6.59398, -13.4154, 4.75614, -6.27537, 10.5947, 16.9695, 111.008, 6.75893, -53.6353, 55.8212, 4.71394, -33.4011, 93.3452, 25.6801, -17.2054, 115, 45.8926, 122, 27.9057, -48.8437, 17.5161, -5.76167, -23.3373, 114.67, 12.36, 5.51746, 5.37323, 31.1555, 111.228, 29.2687, -7.3959, 78.5039, -10.7657, -26.4292, -50.0945, -10, 120.223, -27.9042, 9.02482, 122, Policy 2 will be: 59.0461, -38.9451, 36.488, 4.62027, -50.197, 50.47, -10, 68.0952, -53.2444, -10, 66.7451, 25.0228, -7.84401, -84.2517, 9.88731, -3, 4.02794, 3.52325, 56.2857, -43.9309, 20.993, 120.228, 5.92348, 0, -88, -6.22214, -4.89108, -16.3086, -2.59906, -5.81783, -23.2145, 22.04, 104.754, 15.8943, -53.9646, 49.8944, 0.665373, -37.3676, 90.0775, 27.442, -25, 115, 40.5643, 122, 32.9298, -45.2703, 0.535087, -13.4534, -23.4337, 111.636, 3.62477, 6.09638, 38.6557, 23.6677, 109.011, 18.6676, 0, 72.8271, -9.72022, -20.064, -46.7053, -10, 112.535, -60.4629, 6.08881, 122, Policy 3 will be: 50.5994, -40.2511, 28.1754, 7.99501, -53.2595, 63.4857, -10, 80.871, -72.3169, -9.53954, 80.9124, 12.1258, -3.08743, -82.2659, 4.68475, -7.93379, 0.231804, -10, 67.1051, -90.6763, 21.6285, 122, 18.3233, -1.29867, -85.059, -5.37323, -5.87327, -60.0643, 5.22805, -14.0581, 1.35872, 14.2476, 100.571, 14.9943, -45.0993, 59.0259, -1.55835, -33.553, 68.5011, 23.4032, -25, 107.531, 28.1507, 112.981, 44.8931, -41.8953, 0.519493, -9.61719, -14.5656, 130, -3.54202, 25.0744, 41.4722, 34.0744, 112.979, 21.4799, -2.80311, 62.4313, -8.936, -17.4872, -40.7234, -10, 122, -36.7696, 8.25477, 113.266, Policy 4 will be: 58.5267, -31.6647, 30.0674, 6.9183, -55, 57.8496, -8.80569, 88.8504, -93.9836, -10, 75.0503, 16.7004, -13.5176, -88, 4.43159, -7.27797, -28.373, -10, 72.5631, -59.1823, 14.6275, 110.871, 15.9125, 0, -72.9825, -6.07518, -5.35988, -47.9815, -2.03727, -5.01691, 10.7005, 15.726, 101.85, 17.4206, -48.8841, 55.9605, 0.569888, -28.0125, 89.5033, 14.74, -25, 101.8, 40.0058, 122, 28.0271, -54.4747, 12.3604, -12.112, -17.8651, 105.261, -2.94471, 13.0319, 22.1057, 33.0209, 98.257, 35.3271, 0, 50.1468, -10.8521, -21.2502, -53.2728, -10, 122, -48.6757, 11.7471, 122, trial: 0, score: 6 trial: 1, score: 6 Policy 0: 55.5696, -34.131, 18.8973, 3.32634, -55, 48.1349, -10, 81.4974, -69.9765, -10, 68.6485, 17.9864, -6.81554, -79.691, 9.01728, -5.6608, -12.3115, -5.85249, 58.2183, -67.1068, 16.6717, 122, 13.6806, -1.70456, -84.9886, -8.75192, -9.78247, -37.359, 6.21018, -2.47065, -6.26666, 14.6776, 97.7101, 16.4524, -49.038, 46.5423, -0.989, -32.1385, 91.8383, 23.1101, -24.7239, 111.314, 37.7985, 116.82, 34.9741, -47.459, 14.156, -9.68547, -19.8416, 114.072, 5.00619, 18.6107, 26.06, 27.1384, 105.306, 26.0414, -1.74684, 61.9897, -5.34724, -21.4885, -38.1509, -10, 122, -38.4795, 6.19088, 115.991, Average Score: 6 trial: 0, score: 0