New Algorithm, initial policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, trial: 0, score: 4934 trial: 1, score: 4670 trial: 2, score: 4767 trial: 3, score: 4221 trial: 4, score: 4470 Policy 0: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Average Score: 4612.4 trial: 0, score: 4607 trial: 1, score: 4958 trial: 2, score: 4093 trial: 3, score: 4317 trial: 4, score: 3485 Policy 1: 23.9426, -44.4749, -1.85038, -1.47548, -52.2247, 80.9274, 11.3062, 52.9513, -56.6069, -5.01997, 83.0504, 26.0919, -13.6485, -82.9445, -7.94299, -14.0753, 22.6854, 3.57328, 34.811, -64.9161, 10.2359, 122, 20.4768, -13.3272, -88, -6.13805, -10.9292, 2.16476, 3.79428, -5.76243, -43.5798, -1.7148, 106.815, 18.009, -30.7593, 50.9458, -9.18948, -24.7455, 68.2747, 3.67027, -13.2082, 52.69, 0.641675, 122, 4.53807, -28.578, 50.6513, -13.4516, -21.2894, 110.596, -1.7657, -2.27537, 43.3352, 8.99715, 102.863, 40.9948, -16.8465, 8.75268, -15, -22.667, -14.1491, 0.930691, 116.428, -67.5365, -10, 106.143, Average Score: 4292 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 24.9211, -45.951, -5.36249, 0.0718636, -54.8175, 83.2602, 10.044, 54.4497, -52.4255, -2.55733, 84.5533, 26.4228, -21.2928, -88, -1.66035, -13.8494, 18.9695, 9.07468, 32.3478, -65.1732, 3.13487, 120.177, 21.8886, -8.48884, -87.4922, -4.75596, -13.7931, 0.670613, 4.33081, -11.6676, -42.0376, -9.04255, 100.965, 12.4782, -34.5583, 53.7774, -5.6053, -15.9787, 69.9518, 2.69977, -11.1247, 54.1455, 1.68771, 120.268, 4.7312, -28.3845, 56.3942, -9.74809, -13.5278, 113.535, -1.64992, -2.25317, 50.6489, 11.662, 99.3083, 40.1893, -9.80715, 9.39464, -8.60702, -23.9634, -18.7752, 1.70403, 118.472, -62.8263, -3.17779, 104.932, trial: 0, score: 4238 trial: 1, score: 4710 trial: 2, score: 1885 trial: 3, score: 4125 trial: 4, score: 4158 Policy 1: 24.9211, -45.951, -5.36249, 0.0718636, -54.8175, 83.2602, 10.044, 54.4497, -52.4255, -2.55733, 84.5533, 26.4228, -21.2928, -88, -1.66035, -13.8494, 18.9695, 9.07468, 32.3478, -65.1732, 3.13487, 120.177, 21.8886, -8.48884, -87.4922, -4.75596, -13.7931, 0.670613, 4.33081, -11.6676, -42.0376, -9.04255, 100.965, 12.4782, -34.5583, 53.7774, -5.6053, -15.9787, 69.9518, 2.69977, -11.1247, 54.1455, 1.68771, 120.268, 4.7312, -28.3845, 56.3942, -9.74809, -13.5278, 113.535, -1.64992, -2.25317, 50.6489, 11.662, 99.3083, 40.1893, -9.80715, 9.39464, -8.60702, -23.9634, -18.7752, 1.70403, 118.472, -62.8263, -3.17779, 104.932, Average Score: 3823.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 23.1905, -49.4223, -10.744, -3.89812, -55, 80.8355, 8.0258, 54.9532, -51.2198, -6.14925, 75.7154, 28.6671, -20.1846, -82.5079, -3.33008, -14.4935, 18.8405, 4.2145, 33.1995, -59.9583, 7.30216, 122, 24.9844, -14.4277, -88, -2.59077, -6.94061, 2.06426, -2.19975, -2.35674, -43.3592, -4.81, 108.376, 13.3542, -27.4792, 53.0937, -0.92962, -24.7762, 68.4214, 5.3268, -9.28225, 51.8178, -0.721351, 122, 0, -27.3571, 50.6687, -7.04577, -15.4534, 109.079, -6.68205, -3.67121, 50.8061, 8.20377, 106.372, 37.1809, -8.29201, 8.8304, -9.51764, -19.7934, -16.2331, 2.87267, 118.886, -61.1267, -6.92344, 101.651, trial: 0, score: 4366 trial: 1, score: 4061 trial: 2, score: 4509 trial: 3, score: 4253 trial: 4, score: 3646 Policy 1: 23.1905, -49.4223, -10.744, -3.89812, -55, 80.8355, 8.0258, 54.9532, -51.2198, -6.14925, 75.7154, 28.6671, -20.1846, -82.5079, -3.33008, -14.4935, 18.8405, 4.2145, 33.1995, -59.9583, 7.30216, 122, 24.9844, -14.4277, -88, -2.59077, -6.94061, 2.06426, -2.19975, -2.35674, -43.3592, -4.81, 108.376, 13.3542, -27.4792, 53.0937, -0.92962, -24.7762, 68.4214, 5.3268, -9.28225, 51.8178, -0.721351, 122, 0, -27.3571, 50.6687, -7.04577, -15.4534, 109.079, -6.68205, -3.67121, 50.8061, 8.20377, 106.372, 37.1809, -8.29201, 8.8304, -9.51764, -19.7934, -16.2331, 2.87267, 118.886, -61.1267, -6.92344, 101.651, Average Score: 4167 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 21.2402, -47.8623, -1.89619, -5.18497, -47.904, 84.0368, 8.67717, 54.6419, -50.2111, -10, 77.4083, 27.4238, -19.8288, -79.805, -4.78153, -16.3002, 21.4678, -0.582728, 38.4443, -66.4365, 1.02197, 122, 17.5806, -10.0766, -88, -5.86298, -13.3257, 0.632201, 3.72486, -4.74063, -44.0796, -9.52988, 110.161, 14.3881, -35.3381, 54.6716, -8.79982, -20.5705, 68.9352, 5.11994, -9.47361, 53.5388, -2.43318, 121.219, 0.511883, -26.7531, 54.0198, -9.82105, -13.8531, 111.883, -8.57537, 2.11069, 49.5632, 11.8417, 105.101, 33.9433, -11.3401, 10.0304, -15, -17.5339, -13.2551, 6.5265, 117.624, -69.9886, -8.76221, 104.443, trial: 0, score: 2534 trial: 1, score: 4031 trial: 2, score: 2717 trial: 3, score: 1789 trial: 4, score: 3870 Policy 1: 21.2402, -47.8623, -1.89619, -5.18497, -47.904, 84.0368, 8.67717, 54.6419, -50.2111, -10, 77.4083, 27.4238, -19.8288, -79.805, -4.78153, -16.3002, 21.4678, -0.582728, 38.4443, -66.4365, 1.02197, 122, 17.5806, -10.0766, -88, -5.86298, -13.3257, 0.632201, 3.72486, -4.74063, -44.0796, -9.52988, 110.161, 14.3881, -35.3381, 54.6716, -8.79982, -20.5705, 68.9352, 5.11994, -9.47361, 53.5388, -2.43318, 121.219, 0.511883, -26.7531, 54.0198, -9.82105, -13.8531, 111.883, -8.57537, 2.11069, 49.5632, 11.8417, 105.101, 33.9433, -11.3401, 10.0304, -15, -17.5339, -13.2551, 6.5265, 117.624, -69.9886, -8.76221, 104.443, Average Score: 2988.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 19.1205, -48.721, -7.50291, -6.44429, -55, 81.8721, 9.78129, 51.4927, -52.1267, -7.67043, 84.958, 28.8349, -18.5838, -86.7546, -0.276977, -13.6179, 25.8414, 8.37612, 34.0529, -60.0015, 1.41346, 122, 17.67, -13.3618, -83.2761, -7.20304, -14.1263, 0.898362, 0.161639, -3.7093, -34.5416, -0.0521526, 110.358, 17.461, -29.2125, 53.9934, -8.80432, -22.8045, 71.1586, 3.58322, -16.5523, 59.0065, -3.04784, 122, 6.09217, -28.5796, 57.9585, -6.66957, -20.0352, 110.142, -8.34393, -2.52858, 48.7995, 10.8754, 106.619, 41.1409, -10.5272, 7.50024, -12.7344, -14.9191, -14.9076, 4.80808, 120.457, -68.8067, -10, 99.5583, trial: 0, score: 2781 trial: 1, score: 3869 trial: 2, score: 3325 trial: 3, score: 3677 trial: 4, score: 3101 Policy 1: 19.1205, -48.721, -7.50291, -6.44429, -55, 81.8721, 9.78129, 51.4927, -52.1267, -7.67043, 84.958, 28.8349, -18.5838, -86.7546, -0.276977, -13.6179, 25.8414, 8.37612, 34.0529, -60.0015, 1.41346, 122, 17.67, -13.3618, -83.2761, -7.20304, -14.1263, 0.898362, 0.161639, -3.7093, -34.5416, -0.0521526, 110.358, 17.461, -29.2125, 53.9934, -8.80432, -22.8045, 71.1586, 3.58322, -16.5523, 59.0065, -3.04784, 122, 6.09217, -28.5796, 57.9585, -6.66957, -20.0352, 110.142, -8.34393, -2.52858, 48.7995, 10.8754, 106.619, 41.1409, -10.5272, 7.50024, -12.7344, -14.9191, -14.9076, 4.80808, 120.457, -68.8067, -10, 99.5583, Average Score: 3350.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 20.0274, -44.6107, -9.68275, -4.03791, -55, 80.6839, 10.4159, 47.1891, -49.9594, -1.64148, 76.678, 23.7075, -20.279, -87.1718, -5.25829, -10.791, 17.2546, 1.19006, 33.4464, -59.6165, 9.62533, 120.728, 20.558, -7.22954, -88, -7.58602, -14.1939, -1.08995, 0.345605, -3.22222, -42.9654, 0.055658, 109.78, 16.0944, -32.9631, 59.2844, -5.63062, -16.7863, 70.2458, 2.99566, -18.1368, 58.8696, 0.686101, 121.888, 1.70391, -30.4573, 55.4092, -12.5558, -13.4817, 112.287, -3.69422, -4.52674, 49.758, 5.48769, 100.846, 37.991, -15.2552, 12.1005, -14.4781, -18.4941, -15.4861, 0.211187, 116.891, -68.886, -7.78911, 107.02, trial: 0, score: 4445 trial: 1, score: 4893 trial: 2, score: 3933 trial: 3, score: 4383 trial: 4, score: 4118 Policy 1: 20.0274, -44.6107, -9.68275, -4.03791, -55, 80.6839, 10.4159, 47.1891, -49.9594, -1.64148, 76.678, 23.7075, -20.279, -87.1718, -5.25829, -10.791, 17.2546, 1.19006, 33.4464, -59.6165, 9.62533, 120.728, 20.558, -7.22954, -88, -7.58602, -14.1939, -1.08995, 0.345605, -3.22222, -42.9654, 0.055658, 109.78, 16.0944, -32.9631, 59.2844, -5.63062, -16.7863, 70.2458, 2.99566, -18.1368, 58.8696, 0.686101, 121.888, 1.70391, -30.4573, 55.4092, -12.5558, -13.4817, 112.287, -3.69422, -4.52674, 49.758, 5.48769, 100.846, 37.991, -15.2552, 12.1005, -14.4781, -18.4941, -15.4861, 0.211187, 116.891, -68.886, -7.78911, 107.02, Average Score: 4354.4 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 20.1056, -48.7511, -2.62656, 0.536988, -50.911, 87.0524, 9.29314, 48.5022, -56.7838, -9.42921, 77.502, 20.8497, -19.4671, -80.419, -5.49936, -12.9496, 22.4207, 2.60236, 31.3664, -66.7785, 4.99839, 122, 18.5413, -5.7669, -88, -9.13613, -6.06622, -3.59738, 1.57775, -2.83811, -41.1309, -3.59592, 102.529, 11.6402, -35.1741, 49.7431, -6.71771, -17.1617, 70.0876, 1.04784, -8.70869, 55.3604, 2.3039, 120.707, 1.64506, -26.4681, 53.6347, -12.6204, -17.6549, 112.025, -0.534148, -1.64016, 51.4709, 10.9824, 103.971, 32.9529, -13.3777, 10.8759, -8.29294, -23.4091, -16.9409, 5.52464, 122, -66.3846, -6.02655, 107.62, trial: 0, score: 3710 trial: 1, score: 4342 trial: 2, score: 4637 trial: 3, score: 4253 trial: 4, score: 3742 Policy 1: 20.1056, -48.7511, -2.62656, 0.536988, -50.911, 87.0524, 9.29314, 48.5022, -56.7838, -9.42921, 77.502, 20.8497, -19.4671, -80.419, -5.49936, -12.9496, 22.4207, 2.60236, 31.3664, -66.7785, 4.99839, 122, 18.5413, -5.7669, -88, -9.13613, -6.06622, -3.59738, 1.57775, -2.83811, -41.1309, -3.59592, 102.529, 11.6402, -35.1741, 49.7431, -6.71771, -17.1617, 70.0876, 1.04784, -8.70869, 55.3604, 2.3039, 120.707, 1.64506, -26.4681, 53.6347, -12.6204, -17.6549, 112.025, -0.534148, -1.64016, 51.4709, 10.9824, 103.971, 32.9529, -13.3777, 10.8759, -8.29294, -23.4091, -16.9409, 5.52464, 122, -66.3846, -6.02655, 107.62, Average Score: 4136.8 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 20.8781, -53.6856, -7.55567, -4.04241, -54.4442, 83.7771, 7.68886, 47.1165, -58.5336, -1.51974, 82.7183, 26.9259, -16.0653, -80.9289, -7.27157, -6.74584, 18.5172, 2.75816, 39.0733, -67.3646, 7.86908, 117.971, 17.7537, -11.441, -86.9298, -11.4514, -11.3948, 1.61165, 3.36566, -3.61129, -43.6667, -6.16473, 107.111, 9.5358, -28.8466, 51.5034, -2.68632, -17.5642, 72.0767, 2.50071, -15.373, 54.0747, -3.51143, 122, 0, -29.7435, 56.5058, -13.0779, -14.6831, 110.063, -2.0031, -1.36816, 50.8182, 5.62513, 100.25, 40.3586, -10.3986, 7.92532, -9.15335, -15.5526, -18.3633, 3.07121, 116.109, -68.4092, -9.34007, 103.236, trial: 0, score: 3702 trial: 1, score: 3869 trial: 2, score: 1853 trial: 3, score: 3710 trial: 4, score: 3805 Policy 1: 20.8781, -53.6856, -7.55567, -4.04241, -54.4442, 83.7771, 7.68886, 47.1165, -58.5336, -1.51974, 82.7183, 26.9259, -16.0653, -80.9289, -7.27157, -6.74584, 18.5172, 2.75816, 39.0733, -67.3646, 7.86908, 117.971, 17.7537, -11.441, -86.9298, -11.4514, -11.3948, 1.61165, 3.36566, -3.61129, -43.6667, -6.16473, 107.111, 9.5358, -28.8466, 51.5034, -2.68632, -17.5642, 72.0767, 2.50071, -15.373, 54.0747, -3.51143, 122, 0, -29.7435, 56.5058, -13.0779, -14.6831, 110.063, -2.0031, -1.36816, 50.8182, 5.62513, 100.25, 40.3586, -10.3986, 7.92532, -9.15335, -15.5526, -18.3633, 3.07121, 116.109, -68.4092, -9.34007, 103.236, Average Score: 3387.8 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 25.7385, -46.4743, -5.62845, -6.80344, -52.1713, 88.4443, 11.3091, 48.7857, -56.0338, -2.27461, 80.4478, 25.6619, -19.6973, -88, -6.86107, -12.3207, 25.0946, -0.154084, 31.4725, -66.4742, 9.84858, 118.693, 22.0614, -9.72396, -86.0007, -2.44313, -13.551, -6.577, 1.45391, -9.45531, -40.199, -0.713396, 102.266, 13.6999, -35.471, 52.6913, -4.65411, -16.9341, 74.5296, 7.88404, -17.548, 53.7212, 5.6743, 119.604, 0, -22.9229, 55.7635, -9.41666, -21.3418, 114.696, -2.26639, 0.482984, 50.2444, 10.948, 98.5391, 38.0222, -8.70592, 10.3853, -12.0723, -20.7977, -13.1761, 4.09672, 119.628, -64.6453, -6.25239, 107.124, trial: 0, score: 3518 trial: 1, score: 4285 trial: 2, score: 4157 trial: 3, score: 4189 trial: 4, score: 4509 Policy 1: 25.7385, -46.4743, -5.62845, -6.80344, -52.1713, 88.4443, 11.3091, 48.7857, -56.0338, -2.27461, 80.4478, 25.6619, -19.6973, -88, -6.86107, -12.3207, 25.0946, -0.154084, 31.4725, -66.4742, 9.84858, 118.693, 22.0614, -9.72396, -86.0007, -2.44313, -13.551, -6.577, 1.45391, -9.45531, -40.199, -0.713396, 102.266, 13.6999, -35.471, 52.6913, -4.65411, -16.9341, 74.5296, 7.88404, -17.548, 53.7212, 5.6743, 119.604, 0, -22.9229, 55.7635, -9.41666, -21.3418, 114.696, -2.26639, 0.482984, 50.2444, 10.948, 98.5391, 38.0222, -8.70592, 10.3853, -12.0723, -20.7977, -13.1761, 4.09672, 119.628, -64.6453, -6.25239, 107.124, Average Score: 4131.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 22.9292, -48.3542, -4.99807, 0.55518, -51.1421, 86.123, 10.1104, 49.3759, -51.7527, -1.87596, 80.8108, 30.0381, -14.7243, -83.1967, 0.997593, -15.5529, 17.691, 1.22914, 30.6584, -60.0881, 8.86967, 121.043, 25.3291, -14.1687, -84.9753, -2.88678, -5.60053, -1.35294, 5.16293, -6.11621, -37.7115, -7.6573, 102.712, 17.122, -26.3633, 50.3891, -0.375445, -20.1501, 68.35, 3.09295, -9.60667, 57.4984, 0.773369, 122, 3.7437, -30.9614, 54.7496, -8.64798, -20.633, 111.815, -5.58362, -0.67838, 44.3552, 6.97796, 104.375, 36.1588, -10.133, 11.1702, -15, -17.2264, -19.7204, 6.62614, 120.733, -64.31, -10, 106.427, trial: 0, score: 3997 trial: 1, score: 3389 trial: 2, score: 4445 trial: 3, score: 4285 trial: 4, score: 3837 Policy 1: 22.9292, -48.3542, -4.99807, 0.55518, -51.1421, 86.123, 10.1104, 49.3759, -51.7527, -1.87596, 80.8108, 30.0381, -14.7243, -83.1967, 0.997593, -15.5529, 17.691, 1.22914, 30.6584, -60.0881, 8.86967, 121.043, 25.3291, -14.1687, -84.9753, -2.88678, -5.60053, -1.35294, 5.16293, -6.11621, -37.7115, -7.6573, 102.712, 17.122, -26.3633, 50.3891, -0.375445, -20.1501, 68.35, 3.09295, -9.60667, 57.4984, 0.773369, 122, 3.7437, -30.9614, 54.7496, -8.64798, -20.633, 111.815, -5.58362, -0.67838, 44.3552, 6.97796, 104.375, 36.1588, -10.133, 11.1702, -15, -17.2264, -19.7204, 6.62614, 120.733, -64.31, -10, 106.427, Average Score: 3990.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 24.2589, -48.1314, -3.77691, -8.37454, -55, 87.2467, 11.7531, 46.6226, -52.083, -2.24095, 79.2751, 29.6434, -19.1071, -87.6102, 1.49333, -7.3761, 17.8075, 8.18686, 36.4165, -62.0298, 10.887, 121.329, 26.3544, -6.77763, -83.0009, -4.31717, -12.3527, -3.8654, 3.99706, -2.31727, -39.8889, -7.63857, 103.084, 18.9986, -27.2687, 56.0314, -2.3299, -18.9724, 74.9213, 0.372268, -12.1477, 59.0965, 0.27607, 118.145, 5.65721, -24.8551, 49.44, -8.10083, -17.1383, 114.721, -9.51616, -0.47459, 53.2428, 8.23634, 105.289, 37.9651, -16.9962, 10.6062, -11.9413, -16.0151, -18.6422, 5.36827, 122, -65.4907, -8.53782, 107.624, trial: 0, score: 1854 trial: 1, score: 2717 trial: 2, score: 2269 trial: 3, score: 3101 trial: 4, score: 2525 Policy 1: 24.2589, -48.1314, -3.77691, -8.37454, -55, 87.2467, 11.7531, 46.6226, -52.083, -2.24095, 79.2751, 29.6434, -19.1071, -87.6102, 1.49333, -7.3761, 17.8075, 8.18686, 36.4165, -62.0298, 10.887, 121.329, 26.3544, -6.77763, -83.0009, -4.31717, -12.3527, -3.8654, 3.99706, -2.31727, -39.8889, -7.63857, 103.084, 18.9986, -27.2687, 56.0314, -2.3299, -18.9724, 74.9213, 0.372268, -12.1477, 59.0965, 0.27607, 118.145, 5.65721, -24.8551, 49.44, -8.10083, -17.1383, 114.721, -9.51616, -0.47459, 53.2428, 8.23634, 105.289, 37.9651, -16.9962, 10.6062, -11.9413, -16.0151, -18.6422, 5.36827, 122, -65.4907, -8.53782, 107.624, Average Score: 2493.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 23.4368, -46.7469, -6.04888, -2.46282, -48.5053, 90.5559, 7.66636, 49.0556, -56.1643, -4.40109, 76.6904, 24.7402, -22.6489, -85.8836, -1.9278, -11.9951, 20.4891, 6.90768, 32.25, -67.2814, 10.4462, 120.318, 19.5798, -12.6317, -87.3846, -8.19556, -12.6832, -0.545944, -2.595, -2.78229, -35.9729, -6.43092, 110.642, 16.2727, -34.9677, 55.4082, -2.45911, -24.7127, 69.1928, 7.75724, -12.7039, 57.4524, 1.07351, 120.408, 0, -25.0291, 54.698, -12.8808, -19.2147, 106.56, -3.25337, 0.241623, 45.4288, 10.7891, 99.8725, 39.1751, -9.77299, 15.5318, -15, -21.9372, -13.1035, 3.75272, 121.79, -65.3218, -10, 101.549, trial: 0, score: 3293 trial: 1, score: 2718 trial: 2, score: 1655 trial: 3, score: 3389 trial: 4, score: 2813 Policy 1: 23.4368, -46.7469, -6.04888, -2.46282, -48.5053, 90.5559, 7.66636, 49.0556, -56.1643, -4.40109, 76.6904, 24.7402, -22.6489, -85.8836, -1.9278, -11.9951, 20.4891, 6.90768, 32.25, -67.2814, 10.4462, 120.318, 19.5798, -12.6317, -87.3846, -8.19556, -12.6832, -0.545944, -2.595, -2.78229, -35.9729, -6.43092, 110.642, 16.2727, -34.9677, 55.4082, -2.45911, -24.7127, 69.1928, 7.75724, -12.7039, 57.4524, 1.07351, 120.408, 0, -25.0291, 54.698, -12.8808, -19.2147, 106.56, -3.25337, 0.241623, 45.4288, 10.7891, 99.8725, 39.1751, -9.77299, 15.5318, -15, -21.9372, -13.1035, 3.75272, 121.79, -65.3218, -10, 101.549, Average Score: 2773.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 24.1467, -52.8309, -6.92099, -0.473105, -54.2888, 83.9569, 5.58794, 46.8438, -53.6139, -7.97181, 84.179, 25.765, -21.226, -79.6789, -2.454, -13.4345, 18.517, 7.52215, 31.0603, -68.1335, 7.56376, 120.817, 20.45, -6.11659, -88, -5.21334, -13.1831, -2.62152, -2.63149, -6.56351, -36.4188, -2.76692, 105.402, 16.6552, -30.6253, 54.3664, -5.44581, -18.5312, 71.9875, 1.62543, -8.40776, 56.6329, -3.23639, 121.641, 3.16945, -22.7374, 57.4118, -13.6266, -20.2028, 113.873, -9.63454, -1.26523, 44.8686, 3.73587, 100.974, 39.0835, -13.0248, 7.45279, -9.55058, -23.6424, -15.1593, -0.174318, 122, -64.5029, -9.8144, 103.101, trial: 0, score: 1735 trial: 1, score: 3581 trial: 2, score: 1789 trial: 3, score: 1629 trial: 4, score: 1789 Policy 1: 24.1467, -52.8309, -6.92099, -0.473105, -54.2888, 83.9569, 5.58794, 46.8438, -53.6139, -7.97181, 84.179, 25.765, -21.226, -79.6789, -2.454, -13.4345, 18.517, 7.52215, 31.0603, -68.1335, 7.56376, 120.817, 20.45, -6.11659, -88, -5.21334, -13.1831, -2.62152, -2.63149, -6.56351, -36.4188, -2.76692, 105.402, 16.6552, -30.6253, 54.3664, -5.44581, -18.5312, 71.9875, 1.62543, -8.40776, 56.6329, -3.23639, 121.641, 3.16945, -22.7374, 57.4118, -13.6266, -20.2028, 113.873, -9.63454, -1.26523, 44.8686, 3.73587, 100.974, 39.0835, -13.0248, 7.45279, -9.55058, -23.6424, -15.1593, -0.174318, 122, -64.5029, -9.8144, 103.101, Average Score: 2104.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 22.1547, -46.507, -3.27666, -8.53828, -48.329, 83.6956, 11.3686, 53.157, -56.7479, -0.910096, 78.6315, 28.2828, -16.8526, -80.5132, -0.520492, -13.1707, 19.9377, 2.07984, 37.9306, -65.6992, 5.04487, 117.887, 17.5291, -11.1025, -87.6334, -6.86032, -6.62116, -2.67097, -2.53464, -10.5164, -36.7761, -8.22987, 101.061, 15.1464, -30.0387, 49.6311, -8.42849, -19.9318, 74.2403, 6.19639, -15.9567, 54.9987, -1.23556, 122, 4.33267, -29.5036, 53.4993, -8.13345, -21.4214, 112.413, -4.58655, -4.80189, 48.8797, 12.4955, 107.096, 34.3224, -11.4836, 15.9478, -15, -18.7419, -13.9108, 4.57276, 117.926, -63.8413, -6.84897, 98.7588, trial: 0, score: 2941 trial: 1, score: 3965 trial: 2, score: 3996 trial: 3, score: 3808 trial: 4, score: 4246 Policy 1: 22.1547, -46.507, -3.27666, -8.53828, -48.329, 83.6956, 11.3686, 53.157, -56.7479, -0.910096, 78.6315, 28.2828, -16.8526, -80.5132, -0.520492, -13.1707, 19.9377, 2.07984, 37.9306, -65.6992, 5.04487, 117.887, 17.5291, -11.1025, -87.6334, -6.86032, -6.62116, -2.67097, -2.53464, -10.5164, -36.7761, -8.22987, 101.061, 15.1464, -30.0387, 49.6311, -8.42849, -19.9318, 74.2403, 6.19639, -15.9567, 54.9987, -1.23556, 122, 4.33267, -29.5036, 53.4993, -8.13345, -21.4214, 112.413, -4.58655, -4.80189, 48.8797, 12.4955, 107.096, 34.3224, -11.4836, 15.9478, -15, -18.7419, -13.9108, 4.57276, 117.926, -63.8413, -6.84897, 98.7588, Average Score: 3791.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 25.7724, -52.8884, -6.04417, -8.78175, -55, 86.8449, 4.46186, 47.9945, -57.006, -3.60563, 81.4396, 25.3191, -15.6657, -87.7553, -5.52774, -7.42246, 18.4129, 0.804282, 31.826, -63.5727, 10.9519, 119.785, 17.1004, -7.57406, -86.0576, -2.68378, -7.04241, 1.48746, 3.55438, -3.48017, -41.1508, -8.30444, 100.831, 15.434, -26.4402, 57.1431, -1.41813, -17.6095, 67.51, 4.74273, -13.2703, 55.6151, 0.157542, 122, 0.391376, -31.1911, 55.8629, -6.59242, -19.1391, 109.689, -7.60636, -4.30694, 48.4211, 10.7679, 100.119, 33.8348, -11.8595, 11.3203, -10.754, -18.3075, -13.8023, 2.07923, 122, -69.3774, -6.46591, 105.975, trial: 0, score: 1734 trial: 1, score: 2045 trial: 2, score: 1983 trial: 3, score: 3126 trial: 4, score: 2013 Policy 1: 25.7724, -52.8884, -6.04417, -8.78175, -55, 86.8449, 4.46186, 47.9945, -57.006, -3.60563, 81.4396, 25.3191, -15.6657, -87.7553, -5.52774, -7.42246, 18.4129, 0.804282, 31.826, -63.5727, 10.9519, 119.785, 17.1004, -7.57406, -86.0576, -2.68378, -7.04241, 1.48746, 3.55438, -3.48017, -41.1508, -8.30444, 100.831, 15.434, -26.4402, 57.1431, -1.41813, -17.6095, 67.51, 4.74273, -13.2703, 55.6151, 0.157542, 122, 0.391376, -31.1911, 55.8629, -6.59242, -19.1391, 109.689, -7.60636, -4.30694, 48.4211, 10.7679, 100.119, 33.8348, -11.8595, 11.3203, -10.754, -18.3075, -13.8023, 2.07923, 122, -69.3774, -6.46591, 105.975, Average Score: 2180.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 18.6383, -48.2864, -1.11949, -7.03651, -51.6896, 83.9901, 13.5588, 54.1491, -51.7606, -2.47892, 75.4447, 23.3047, -21.3445, -83.9062, -4.10754, -7.98037, 23.1887, 6.446, 38.0292, -67.3245, 5.20864, 122, 26.7626, -5.46893, -88, -3.80896, -13.5514, 1.25852, 6.6453, -3.66913, -36.7365, -4.03303, 101.843, 16.8362, -30.405, 55.2195, -3.88869, -18.9414, 74.9497, 3.33674, -9.89048, 54.5839, 3.24173, 120.998, 0, -28.2876, 55.961, -8.4231, -15.5077, 108.404, -8.09314, 1.04738, 44.6451, 10.6189, 102.535, 41.0473, -15.5785, 8.06169, -11.5632, -20.6649, -12.7709, 1.55624, 122, -65.4558, -10, 103.356, trial: 0, score: 3318 trial: 1, score: 1981 trial: 2, score: 2141 trial: 3, score: 2429 trial: 4, score: 1629 Policy 1: 18.6383, -48.2864, -1.11949, -7.03651, -51.6896, 83.9901, 13.5588, 54.1491, -51.7606, -2.47892, 75.4447, 23.3047, -21.3445, -83.9062, -4.10754, -7.98037, 23.1887, 6.446, 38.0292, -67.3245, 5.20864, 122, 26.7626, -5.46893, -88, -3.80896, -13.5514, 1.25852, 6.6453, -3.66913, -36.7365, -4.03303, 101.843, 16.8362, -30.405, 55.2195, -3.88869, -18.9414, 74.9497, 3.33674, -9.89048, 54.5839, 3.24173, 120.998, 0, -28.2876, 55.961, -8.4231, -15.5077, 108.404, -8.09314, 1.04738, 44.6451, 10.6189, 102.535, 41.0473, -15.5785, 8.06169, -11.5632, -20.6649, -12.7709, 1.55624, 122, -65.4558, -10, 103.356, Average Score: 2299.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 19.9502, -51.0768, -9.5326, -0.423233, -55, 80.9332, 4.40623, 53.8338, -50.3448, -2.07255, 76.917, 29.715, -22.8109, -84.4137, -1.97004, -10.6198, 16.488, 9.08468, 38.1944, -60.5476, 4.40555, 122, 19.1915, -6.98885, -88, -6.28207, -12.7296, 2.41454, 2.45828, -11.3555, -34.7478, -3.56827, 103.605, 9.63264, -33.6327, 58.3357, -7.89288, -20.3084, 67.5684, 6.762, -13.3184, 51.3511, -1.33802, 122, 5.16776, -24.4147, 56.674, -13.0648, -18.8206, 115.282, -7.81938, -3.56927, 45.4558, 4.56593, 102.399, 37.3024, -9.10249, 7.71328, -15, -19.6803, -21.8012, 4.48804, 121.577, -68.8317, -8.36287, 102.088, trial: 0, score: 2272 trial: 1, score: 2039 trial: 2, score: 1693 trial: 3, score: 1789 trial: 4, score: 2079 Policy 1: 19.9502, -51.0768, -9.5326, -0.423233, -55, 80.9332, 4.40623, 53.8338, -50.3448, -2.07255, 76.917, 29.715, -22.8109, -84.4137, -1.97004, -10.6198, 16.488, 9.08468, 38.1944, -60.5476, 4.40555, 122, 19.1915, -6.98885, -88, -6.28207, -12.7296, 2.41454, 2.45828, -11.3555, -34.7478, -3.56827, 103.605, 9.63264, -33.6327, 58.3357, -7.89288, -20.3084, 67.5684, 6.762, -13.3184, 51.3511, -1.33802, 122, 5.16776, -24.4147, 56.674, -13.0648, -18.8206, 115.282, -7.81938, -3.56927, 45.4558, 4.56593, 102.399, 37.3024, -9.10249, 7.71328, -15, -19.6803, -21.8012, 4.48804, 121.577, -68.8317, -8.36287, 102.088, Average Score: 1974.4 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 21.2601, -50.9533, -4.39833, 0.306567, -53.4892, 87.3072, 9.20115, 46.7623, -51.9546, -3.82284, 80.7117, 25.457, -20.679, -80.9603, 1.02133, -10.5996, 18.4794, 3.22992, 34.3943, -60.036, 1.70215, 119.91, 18.5453, -13.8299, -86.3597, -5.0198, -6.49291, -1.46473, -0.539748, -2.39422, -40.1954, -0.755594, 109.041, 14.2721, -27.3244, 59.4269, -4.85696, -16.3048, 70.2915, 4.5252, -9.60155, 55.1012, 6.12054, 122, 0, -24.6406, 53.6415, -7.29005, -21.2177, 110.208, -2.7305, 0.67878, 44.1905, 12.6484, 98.9845, 32.8894, -16.4063, 15.7088, -15, -14.6255, -15.4655, 8.69847, 117.821, -61.9761, -6.48831, 107.635, trial: 0, score: 1759 trial: 1, score: 3422 trial: 2, score: 4158 trial: 3, score: 3166 trial: 4, score: 2717 Policy 1: 21.2601, -50.9533, -4.39833, 0.306567, -53.4892, 87.3072, 9.20115, 46.7623, -51.9546, -3.82284, 80.7117, 25.457, -20.679, -80.9603, 1.02133, -10.5996, 18.4794, 3.22992, 34.3943, -60.036, 1.70215, 119.91, 18.5453, -13.8299, -86.3597, -5.0198, -6.49291, -1.46473, -0.539748, -2.39422, -40.1954, -0.755594, 109.041, 14.2721, -27.3244, 59.4269, -4.85696, -16.3048, 70.2915, 4.5252, -9.60155, 55.1012, 6.12054, 122, 0, -24.6406, 53.6415, -7.29005, -21.2177, 110.208, -2.7305, 0.67878, 44.1905, 12.6484, 98.9845, 32.8894, -16.4063, 15.7088, -15, -14.6255, -15.4655, 8.69847, 117.821, -61.9761, -6.48831, 107.635, Average Score: 3044.4 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 20.6433, -50.2578, -9.965, 0.178585, -50.2227, 84.1204, 6.18082, 53.8329, -54.9641, -6.56599, 83.5682, 25.5752, -19.4832, -88, -7.25729, -7.70161, 17.3234, 4.64351, 32.1695, -63.8006, 8.0228, 119.865, 21.5924, -10.8282, -85.0844, -10.6032, -14.2326, 1.15527, 1.03553, -11.7945, -35.6111, -8.62619, 103.517, 11.466, -31.7505, 53.0114, -1.46164, -19.0854, 75.1196, 3.67989, -15.2922, 56.4268, 2.26846, 119.096, 3.90113, -24.043, 51.1663, -7.2922, -17.8929, 106.241, -1.83226, -1.53626, 51.4404, 8.85935, 99.9577, 35.4931, -11.3219, 11.3732, -13.4863, -14.323, -12.1258, 5.67099, 115.53, -62.6986, -5.18092, 101.236, trial: 0, score: 3900 trial: 1, score: 1820 trial: 2, score: 1757 trial: 3, score: 3775 trial: 4, score: 1629 Policy 1: 20.6433, -50.2578, -9.965, 0.178585, -50.2227, 84.1204, 6.18082, 53.8329, -54.9641, -6.56599, 83.5682, 25.5752, -19.4832, -88, -7.25729, -7.70161, 17.3234, 4.64351, 32.1695, -63.8006, 8.0228, 119.865, 21.5924, -10.8282, -85.0844, -10.6032, -14.2326, 1.15527, 1.03553, -11.7945, -35.6111, -8.62619, 103.517, 11.466, -31.7505, 53.0114, -1.46164, -19.0854, 75.1196, 3.67989, -15.2922, 56.4268, 2.26846, 119.096, 3.90113, -24.043, 51.1663, -7.2922, -17.8929, 106.241, -1.83226, -1.53626, 51.4404, 8.85935, 99.9577, 35.4931, -11.3219, 11.3732, -13.4863, -14.323, -12.1258, 5.67099, 115.53, -62.6986, -5.18092, 101.236, Average Score: 2576.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 22.8314, -51.7755, -2.48724, -0.424025, -51.8958, 88.4991, 6.57793, 55.1716, -49.6087, -2.99926, 83.784, 21.7613, -20.4341, -87.5478, 1.49061, -16.2911, 23.8817, 1.39632, 37.5315, -59.9739, 10.5727, 122, 18.5781, -6.48049, -84.7622, -7.9412, -11.2105, 0.059934, 0.670992, -5.50661, -39.8151, -6.44381, 109.766, 15.3027, -29.2366, 56.7141, -0.344508, -18.6607, 69.5605, 0.0792047, -11.7653, 51.18, -0.564808, 118.374, 1.58071, -28.2971, 48.9927, -12.7799, -12.8453, 109.81, -3.31329, 0.695858, 53.2444, 4.42983, 100.806, 36.4165, -13.7251, 12.3031, -14.0675, -19.4835, -12.1781, 9.31202, 122, -68.3579, -3.30532, 107.728, trial: 0, score: 1598 trial: 1, score: 1782 trial: 2, score: 1758 trial: 3, score: 2775 trial: 4, score: 3645 Policy 1: 22.8314, -51.7755, -2.48724, -0.424025, -51.8958, 88.4991, 6.57793, 55.1716, -49.6087, -2.99926, 83.784, 21.7613, -20.4341, -87.5478, 1.49061, -16.2911, 23.8817, 1.39632, 37.5315, -59.9739, 10.5727, 122, 18.5781, -6.48049, -84.7622, -7.9412, -11.2105, 0.059934, 0.670992, -5.50661, -39.8151, -6.44381, 109.766, 15.3027, -29.2366, 56.7141, -0.344508, -18.6607, 69.5605, 0.0792047, -11.7653, 51.18, -0.564808, 118.374, 1.58071, -28.2971, 48.9927, -12.7799, -12.8453, 109.81, -3.31329, 0.695858, 53.2444, 4.42983, 100.806, 36.4165, -13.7251, 12.3031, -14.0675, -19.4835, -12.1781, 9.31202, 122, -68.3579, -3.30532, 107.728, Average Score: 2311.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 27.21, -45.1367, -1.32716, -4.0658, -55, 82.1937, 6.83729, 46.7142, -58.3977, -8.81989, 75.8214, 28.2787, -16.3242, -83.4742, -3.98981, -8.23742, 19.7726, -0.179379, 32.6862, -66.1753, 1.63646, 122, 24.9874, -11.7403, -86.6372, -6.12471, -7.26632, -1.6218, 4.18258, -10.4253, -42.4245, -1.90691, 106.795, 18.4301, -34.7082, 50.8486, -2.28631, -15.9977, 69.8728, 2.73574, -12.3372, 51.52, -0.164627, 122, 0, -31.1746, 54.821, -10.8421, -11.9087, 109.572, -3.97223, -6.49686, 46.0889, 4.35697, 107.495, 36.2949, -11.9736, 10.5843, -15, -18.1617, -16.7292, 2.95892, 114.534, -64.1292, -4.7174, 105.57, trial: 0, score: 4029 trial: 1, score: 4765 trial: 2, score: 4125 trial: 3, score: 3933 trial: 4, score: 4349 Policy 1: 27.21, -45.1367, -1.32716, -4.0658, -55, 82.1937, 6.83729, 46.7142, -58.3977, -8.81989, 75.8214, 28.2787, -16.3242, -83.4742, -3.98981, -8.23742, 19.7726, -0.179379, 32.6862, -66.1753, 1.63646, 122, 24.9874, -11.7403, -86.6372, -6.12471, -7.26632, -1.6218, 4.18258, -10.4253, -42.4245, -1.90691, 106.795, 18.4301, -34.7082, 50.8486, -2.28631, -15.9977, 69.8728, 2.73574, -12.3372, 51.52, -0.164627, 122, 0, -31.1746, 54.821, -10.8421, -11.9087, 109.572, -3.97223, -6.49686, 46.0889, 4.35697, 107.495, 36.2949, -11.9736, 10.5843, -15, -18.1617, -16.7292, 2.95892, 114.534, -64.1292, -4.7174, 105.57, Average Score: 4240.2 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 23.6962, -46.2948, -10.1011, -2.85684, -48.2235, 87.7608, 13.482, 51.6572, -53.0295, -4.86659, 79.1121, 22.1065, -21.7805, -81.2383, -6.81419, -14.6271, 24.9975, 5.67882, 33.8058, -65.8161, 5.0743, 121.304, 20.5516, -11.4206, -88, -4.01428, -8.21808, 1.85801, -2.64842, -11.4073, -41.1852, -2.19869, 103.493, 14.127, -29.544, 51.6438, -8.95827, -15.1792, 73.991, 5.01409, -11.5013, 51.8644, 5.99194, 119.418, 5.79497, -32.4217, 50.5312, -11.7828, -16.039, 113.703, -3.56673, 0.175219, 48.8047, 10.2642, 105.609, 34.3698, -8.47778, 8.70326, -14.482, -22.0135, -18.7531, 6.25125, 115.611, -63.0822, -10, 104.324, trial: 0, score: 4221 trial: 1, score: 4982 trial: 2, score: 3197 trial: 3, score: 4350 trial: 4, score: 4735 Policy 1: 23.6962, -46.2948, -10.1011, -2.85684, -48.2235, 87.7608, 13.482, 51.6572, -53.0295, -4.86659, 79.1121, 22.1065, -21.7805, -81.2383, -6.81419, -14.6271, 24.9975, 5.67882, 33.8058, -65.8161, 5.0743, 121.304, 20.5516, -11.4206, -88, -4.01428, -8.21808, 1.85801, -2.64842, -11.4073, -41.1852, -2.19869, 103.493, 14.127, -29.544, 51.6438, -8.95827, -15.1792, 73.991, 5.01409, -11.5013, 51.8644, 5.99194, 119.418, 5.79497, -32.4217, 50.5312, -11.7828, -16.039, 113.703, -3.56673, 0.175219, 48.8047, 10.2642, 105.609, 34.3698, -8.47778, 8.70326, -14.482, -22.0135, -18.7531, 6.25125, 115.611, -63.0822, -10, 104.324, Average Score: 4297 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 26.3265, -53.0385, -1.44406, -6.31732, -52.8569, 85.1089, 4.56515, 53.6885, -54.2553, -6.36653, 82.8478, 22.4724, -23.4284, -84.4812, -5.13024, -8.5607, 21.5022, 8.48608, 37.4892, -60.2785, 4.59103, 122, 21.0129, -9.59442, -85.1161, -4.91862, -11.398, -0.169052, 3.9907, -3.27752, -36.6079, -2.30417, 107.997, 16.3003, -32.5071, 52.5076, -3.54551, -17.0805, 70.6311, 1.12489, -14.9937, 51.1046, 4.86478, 118.214, 3.69445, -30.8772, 58.0341, -11.6842, -11.9776, 109.212, -6.96902, -6.22127, 49.8536, 11.7648, 99.8943, 36.3626, -13.8171, 15.4755, -9.09127, -18.0625, -12.4315, 2.32819, 120.65, -63.576, -10, 100.78, trial: 0, score: 4326 trial: 1, score: 4413 trial: 2, score: 2269 trial: 3, score: 4093 trial: 4, score: 4509 Policy 1: 26.3265, -53.0385, -1.44406, -6.31732, -52.8569, 85.1089, 4.56515, 53.6885, -54.2553, -6.36653, 82.8478, 22.4724, -23.4284, -84.4812, -5.13024, -8.5607, 21.5022, 8.48608, 37.4892, -60.2785, 4.59103, 122, 21.0129, -9.59442, -85.1161, -4.91862, -11.398, -0.169052, 3.9907, -3.27752, -36.6079, -2.30417, 107.997, 16.3003, -32.5071, 52.5076, -3.54551, -17.0805, 70.6311, 1.12489, -14.9937, 51.1046, 4.86478, 118.214, 3.69445, -30.8772, 58.0341, -11.6842, -11.9776, 109.212, -6.96902, -6.22127, 49.8536, 11.7648, 99.8943, 36.3626, -13.8171, 15.4755, -9.09127, -18.0625, -12.4315, 2.32819, 120.65, -63.576, -10, 100.78, Average Score: 3922 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 21.694, -52.4358, -9.48614, -6.35682, -54.1261, 80.7601, 5.7816, 53.859, -58.6293, -4.96834, 85.0207, 27.0057, -17.5144, -83.3639, -2.32144, -13.2289, 22.0769, 8.19653, 34.9113, -60.8096, 6.30147, 122, 24.3042, -8.8241, -83.2424, -9.95612, -12.9348, -5.25795, -1.0299, -2.55744, -39.2876, -7.74507, 106.36, 10.0845, -26.2555, 56.7305, -5.84973, -17.1263, 74.2749, 3.3036, -8.69122, 53.8155, 0.475084, 122, 4.13653, -28.0008, 50.2021, -11.2699, -13.3331, 109.338, -9.33519, 2.08623, 48.3583, 5.17372, 101.756, 33.5686, -17.8642, 14.2475, -14.2463, -22.1956, -13.9276, 0.322091, 120.14, -70.4899, -5.1183, 104.472, trial: 0, score: 2079 trial: 1, score: 1661 trial: 2, score: 1918 trial: 3, score: 1981 trial: 4, score: 2175 Policy 1: 21.694, -52.4358, -9.48614, -6.35682, -54.1261, 80.7601, 5.7816, 53.859, -58.6293, -4.96834, 85.0207, 27.0057, -17.5144, -83.3639, -2.32144, -13.2289, 22.0769, 8.19653, 34.9113, -60.8096, 6.30147, 122, 24.3042, -8.8241, -83.2424, -9.95612, -12.9348, -5.25795, -1.0299, -2.55744, -39.2876, -7.74507, 106.36, 10.0845, -26.2555, 56.7305, -5.84973, -17.1263, 74.2749, 3.3036, -8.69122, 53.8155, 0.475084, 122, 4.13653, -28.0008, 50.2021, -11.2699, -13.3331, 109.338, -9.33519, 2.08623, 48.3583, 5.17372, 101.756, 33.5686, -17.8642, 14.2475, -14.2463, -22.1956, -13.9276, 0.322091, 120.14, -70.4899, -5.1183, 104.472, Average Score: 1962.8 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 25.3663, -51.896, -10.7322, -6.69748, -54.5675, 89.1927, 8.59923, 55.2655, -50.4126, -0.959373, 80.0723, 20.6077, -23.2714, -84.9929, -5.2473, -14.9325, 26.0452, 2.04046, 37.5626, -62.9842, 1.84037, 122, 18.826, -11.6165, -83.001, -11.4081, -10.2395, 2.1615, 4.34234, -4.54134, -41.299, -9.7237, 105.758, 19.0729, -34.6984, 50.4619, -2.52445, -24.3512, 68.6195, 0.183204, -14.7264, 51.5503, 5.24813, 122, 1.92948, -29.71, 51.7634, -7.31393, -17.2799, 109.11, -8.46706, 1.26038, 43.7012, 6.27974, 103.514, 35.7504, -12.0579, 13.3647, -13.0677, -17.1237, -19.3711, -0.222704, 116.836, -61.6089, -9.05787, 104.47, trial: 0, score: 2621 trial: 1, score: 4349 trial: 2, score: 4445 trial: 3, score: 3165 trial: 4, score: 4317 Policy 1: 25.3663, -51.896, -10.7322, -6.69748, -54.5675, 89.1927, 8.59923, 55.2655, -50.4126, -0.959373, 80.0723, 20.6077, -23.2714, -84.9929, -5.2473, -14.9325, 26.0452, 2.04046, 37.5626, -62.9842, 1.84037, 122, 18.826, -11.6165, -83.001, -11.4081, -10.2395, 2.1615, 4.34234, -4.54134, -41.299, -9.7237, 105.758, 19.0729, -34.6984, 50.4619, -2.52445, -24.3512, 68.6195, 0.183204, -14.7264, 51.5503, 5.24813, 122, 1.92948, -29.71, 51.7634, -7.31393, -17.2799, 109.11, -8.46706, 1.26038, 43.7012, 6.27974, 103.514, 35.7504, -12.0579, 13.3647, -13.0677, -17.1237, -19.3711, -0.222704, 116.836, -61.6089, -9.05787, 104.47, Average Score: 3779.4 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 24.9999, -47.9501, -7.71937, -7.98666, -50.1475, 82.7905, 12.4528, 53.7761, -58.1199, -5.11696, 80.1044, 30.0693, -15.9266, -82.8875, -6.58741, -14.5439, 21.7221, 0.52087, 33.6928, -61.3596, 6.80832, 117.99, 18.1962, -14.295, -88, -8.88942, -9.1533, -6.69501, 6.10852, -6.49488, -41.7887, -7.85137, 110.198, 18.2565, -35.2622, 59.0569, -8.30118, -16.9365, 71.9016, 3.33082, -16.038, 59.9493, 5.40262, 120.49, 3.1032, -29.0439, 49.8431, -10.7767, -18.6314, 108.47, -7.20996, -3.73794, 47.6706, 3.91409, 105.181, 39.4163, -9.15513, 16.2833, -13.2277, -21.6567, -12.884, 0.606591, 116.967, -69.8296, -10, 105.758, trial: 0, score: 4925 trial: 1, score: 4831 trial: 2, score: 4030 trial: 3, score: 1757 trial: 4, score: 3775 Policy 1: 24.9999, -47.9501, -7.71937, -7.98666, -50.1475, 82.7905, 12.4528, 53.7761, -58.1199, -5.11696, 80.1044, 30.0693, -15.9266, -82.8875, -6.58741, -14.5439, 21.7221, 0.52087, 33.6928, -61.3596, 6.80832, 117.99, 18.1962, -14.295, -88, -8.88942, -9.1533, -6.69501, 6.10852, -6.49488, -41.7887, -7.85137, 110.198, 18.2565, -35.2622, 59.0569, -8.30118, -16.9365, 71.9016, 3.33082, -16.038, 59.9493, 5.40262, 120.49, 3.1032, -29.0439, 49.8431, -10.7767, -18.6314, 108.47, -7.20996, -3.73794, 47.6706, 3.91409, 105.181, 39.4163, -9.15513, 16.2833, -13.2277, -21.6567, -12.884, 0.606591, 116.967, -69.8296, -10, 105.758, Average Score: 3863.6 --------------------------------- New Iteration Current Best Policy: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Current Best Policy Score: 4612.4 Policy 0 will be: 23.1224, -49.2517, -5.98653, -4.27761, -52.6633, 85.6147, 8.98929, 51.0088, -54.4447, -5.80674, 80.107, 25.1688, -18.6368, -84.5136, -2.96202, -11.5002, 21.0803, 4.28637, 34.4619, -63.9083, 6.01989, 122, 21.8809, -9.66904, -88, -6.9221, -9.36704, -2.15887, 2.27538, -6.94946, -39.3566, -4.76196, 105.807, 14.0877, -30.5635, 54.5137, -5.10134, -20.1661, 72.0425, 3.286, -13.2529, 55.8711, 1.2701, 122, 1.80712, -27.6887, 53.2648, -10.4123, -16.4368, 111.206, -5.40436, -2.74324, 48.3031, 8.47706, 102.93, 37.3671, -12.9685, 11.6231, -12.7972, -19.0221, -16.8977, 4.673, 119.342, -65.5067, -6.79058, 102.866, Policy 1 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, trial: 0, score: 4406 trial: 1, score: 5183 trial: 2, score: 4669 trial: 3, score: 4637 trial: 4, score: 4989 Policy 1: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Average Score: 4776.8 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 25.2848, -47.734, -0.312813, -1.41625, -50.1294, 79.0094, 10.1656, 48.0016, -52.4151, -10, 74.7071, 24.6976, -18.6003, -84.9237, -3.61341, -9.86891, 24.1032, 2.43864, 35.9676, -61.4893, 1.4219, 117.213, 22.8568, -3.69389, -88, -14.4528, -3.438, -3.40805, -1.34244, -5.73855, -43.2016, -2.13669, 107.935, 20.5609, -31.1239, 57.0044, 1.84304, -21.9731, 68.7344, 3.14314, -14.5515, 55.5563, -0.128665, 118.008, 2.63762, -28.3415, 57.2171, -15, -16.8006, 110.434, -4.02559, 1.2435, 45.8408, 6.66925, 103.942, 32.2166, -8.67021, 12.6784, -11.4971, -19.8169, -14.5066, 4.55202, 112.425, -72.7025, -10, 110.293, trial: 0, score: 4381 trial: 1, score: 4925 trial: 2, score: 4893 trial: 3, score: 4384 trial: 4, score: 4566 Policy 1: 25.2848, -47.734, -0.312813, -1.41625, -50.1294, 79.0094, 10.1656, 48.0016, -52.4151, -10, 74.7071, 24.6976, -18.6003, -84.9237, -3.61341, -9.86891, 24.1032, 2.43864, 35.9676, -61.4893, 1.4219, 117.213, 22.8568, -3.69389, -88, -14.4528, -3.438, -3.40805, -1.34244, -5.73855, -43.2016, -2.13669, 107.935, 20.5609, -31.1239, 57.0044, 1.84304, -21.9731, 68.7344, 3.14314, -14.5515, 55.5563, -0.128665, 118.008, 2.63762, -28.3415, 57.2171, -15, -16.8006, 110.434, -4.02559, 1.2435, 45.8408, 6.66925, 103.942, 32.2166, -8.67021, 12.6784, -11.4971, -19.8169, -14.5066, 4.55202, 112.425, -72.7025, -10, 110.293, Average Score: 4629.8 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 22.8663, -52.1534, -5.388, -7.77952, -55, 80.4646, 13.5655, 51.2651, -47.4755, -4.31764, 71.5553, 26.2031, -15.7122, -88, 4.68934, -6.63224, 20.3158, 2.07113, 37.5269, -57.7725, 9.57407, 116.324, 25.1544, -8.58727, -87.4342, -11.3219, -3, -2.05603, 1.37557, -11.2126, -44.0895, -5.79727, 108.916, 16.7474, -30.4007, 56.8901, -6.19511, -16.368, 70.7256, 3.16906, -9.81838, 57.9486, 8.69262, 115.888, 0, -33.2114, 54.5142, -8.23176, -10.4808, 109.39, 4.37044, 0.0294915, 46.7281, 6.18809, 102.415, 30.2986, -5.34551, 5.51567, -14.9004, -24.0149, -12.7442, 6.50811, 115.64, -71.9269, -10, 101.36, trial: 0, score: 4765 trial: 1, score: 4061 trial: 2, score: 1789 trial: 3, score: 4095 trial: 4, score: 2493 Policy 1: 22.8663, -52.1534, -5.388, -7.77952, -55, 80.4646, 13.5655, 51.2651, -47.4755, -4.31764, 71.5553, 26.2031, -15.7122, -88, 4.68934, -6.63224, 20.3158, 2.07113, 37.5269, -57.7725, 9.57407, 116.324, 25.1544, -8.58727, -87.4342, -11.3219, -3, -2.05603, 1.37557, -11.2126, -44.0895, -5.79727, 108.916, 16.7474, -30.4007, 56.8901, -6.19511, -16.368, 70.7256, 3.16906, -9.81838, 57.9486, 8.69262, 115.888, 0, -33.2114, 54.5142, -8.23176, -10.4808, 109.39, 4.37044, 0.0294915, 46.7281, 6.18809, 102.415, 30.2986, -5.34551, 5.51567, -14.9004, -24.0149, -12.7442, 6.50811, 115.64, -71.9269, -10, 101.36, Average Score: 3440.6 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 30.955, -53.2674, -2.8941, -7.3444, -50.1475, 80.621, 11.2768, 46.9959, -46.3237, -8.09984, 80.3183, 18.5377, -14.0871, -88, 2.31357, -11.3308, 22.8734, -1.31785, 32.9787, -61.4221, 4.26745, 121.383, 22.2813, -4.56135, -88, -7.75305, -7.00258, -5.08228, 0.658642, -9.7434, -43.9885, -1.84014, 108.963, 14.5618, -38.0535, 53.3027, 1.82155, -24.5822, 75.2995, 1.35994, -15.3951, 56.4219, 6.16157, 122, 3.86784, -28.5073, 53.1341, -11.214, -13.3554, 113.742, -2.23308, 4.18384, 46.1789, 10.5391, 98.0555, 37.3495, -6.00749, 10.8359, -10.6527, -20.4754, -9.83584, 2.90093, 118.302, -69.4659, -10, 102.148, trial: 0, score: 2365 trial: 1, score: 1629 trial: 2, score: 2526 trial: 3, score: 3549 trial: 4, score: 2167 Policy 1: 30.955, -53.2674, -2.8941, -7.3444, -50.1475, 80.621, 11.2768, 46.9959, -46.3237, -8.09984, 80.3183, 18.5377, -14.0871, -88, 2.31357, -11.3308, 22.8734, -1.31785, 32.9787, -61.4221, 4.26745, 121.383, 22.2813, -4.56135, -88, -7.75305, -7.00258, -5.08228, 0.658642, -9.7434, -43.9885, -1.84014, 108.963, 14.5618, -38.0535, 53.3027, 1.82155, -24.5822, 75.2995, 1.35994, -15.3951, 56.4219, 6.16157, 122, 3.86784, -28.5073, 53.1341, -11.214, -13.3554, 113.742, -2.23308, 4.18384, 46.1789, 10.5391, 98.0555, 37.3495, -6.00749, 10.8359, -10.6527, -20.4754, -9.83584, 2.90093, 118.302, -69.4659, -10, 102.148, Average Score: 2447.2 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 31.1118, -47.0438, 1.8206, -7.04933, -55, 79.327, 12.7391, 53.9335, -52.3916, -9.57603, 78.5754, 20.4117, -12.5862, -85.261, -0.266904, -8.80004, 23.9984, 0.869646, 37.405, -58.1252, 3.88717, 122, 23.9939, -2.26511, -83.1188, -14.023, -10.6615, -2.98948, 0.475666, -8.26828, -40.9206, 1.49932, 105.563, 15.4115, -32.7538, 60.7992, 1.84029, -19.9087, 74.2136, 3.97387, -17.362, 55.8979, 3.05457, 122, 0.18107, -26.3309, 55.4103, -13.8357, -12.3226, 110.748, -3.97488, -5.53389, 50.0932, 11.5384, 103.796, 32.1443, -8.48114, 4.53814, -14.1907, -17.4641, -11.6202, 7.46192, 117.711, -65.4712, -5.87337, 107.559, trial: 0, score: 3239 trial: 1, score: 3741 trial: 2, score: 4095 trial: 3, score: 3838 trial: 4, score: 2845 Policy 1: 31.1118, -47.0438, 1.8206, -7.04933, -55, 79.327, 12.7391, 53.9335, -52.3916, -9.57603, 78.5754, 20.4117, -12.5862, -85.261, -0.266904, -8.80004, 23.9984, 0.869646, 37.405, -58.1252, 3.88717, 122, 23.9939, -2.26511, -83.1188, -14.023, -10.6615, -2.98948, 0.475666, -8.26828, -40.9206, 1.49932, 105.563, 15.4115, -32.7538, 60.7992, 1.84029, -19.9087, 74.2136, 3.97387, -17.362, 55.8979, 3.05457, 122, 0.18107, -26.3309, 55.4103, -13.8357, -12.3226, 110.748, -3.97488, -5.53389, 50.0932, 11.5384, 103.796, 32.1443, -8.48114, 4.53814, -14.1907, -17.4641, -11.6202, 7.46192, 117.711, -65.4712, -5.87337, 107.559, Average Score: 3551.6 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 28.1703, -51.0481, -1.7367, -10.2703, -52.4915, 79.9448, 11.0412, 52.305, -50.9691, -2.24501, 78.5445, 26.7647, -13.6476, -84.2964, 4.00857, -3.84293, 20.1757, 2.55935, 33.4734, -61.5737, 1.75204, 121.045, 18.9802, -6.23301, -85.7092, -15, -8.36425, -5.95499, 5.08657, -10.7715, -34.8134, 0.491553, 104.629, 21.3494, -36.9914, 54.4963, -0.557307, -20.8371, 66.6336, -1.00343, -17.1443, 62.326, 7.40894, 121.124, 0, -28.7064, 55.8583, -15, -11.9798, 109.175, 3.69511, -5.20152, 48.6166, 11.2786, 96.1619, 37.076, -8.69773, 6.29093, -10.5102, -15.684, -14.1177, 10.4036, 116.326, -65.1969, -9.83232, 105.919, trial: 0, score: 3743 trial: 1, score: 4349 trial: 2, score: 4382 trial: 3, score: 4031 trial: 4, score: 4348 Policy 1: 28.1703, -51.0481, -1.7367, -10.2703, -52.4915, 79.9448, 11.0412, 52.305, -50.9691, -2.24501, 78.5445, 26.7647, -13.6476, -84.2964, 4.00857, -3.84293, 20.1757, 2.55935, 33.4734, -61.5737, 1.75204, 121.045, 18.9802, -6.23301, -85.7092, -15, -8.36425, -5.95499, 5.08657, -10.7715, -34.8134, 0.491553, 104.629, 21.3494, -36.9914, 54.4963, -0.557307, -20.8371, 66.6336, -1.00343, -17.1443, 62.326, 7.40894, 121.124, 0, -28.7064, 55.8583, -15, -11.9798, 109.175, 3.69511, -5.20152, 48.6166, 11.2786, 96.1619, 37.076, -8.69773, 6.29093, -10.5102, -15.684, -14.1177, 10.4036, 116.326, -65.1969, -9.83232, 105.919, Average Score: 4170.6 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 31.8391, -46.4136, -3.8153, -4.39901, -55, 79.3182, 6.9818, 54.498, -49.5654, -9.16971, 80.5625, 22.6957, -10.0668, -85.3994, -4.01382, -12.1759, 18.1362, 4.8129, 31.959, -61.0091, 1.68675, 122, 18.1782, 0, -85.5379, -12.1367, -6.82792, -2.82571, 0.167852, -10.3439, -42.1106, -3.44522, 102.767, 11.7375, -35.9693, 61.6819, -1.31255, -18.5769, 70.9423, 2.01348, -13.3032, 61.3699, 1.37395, 122, 3.4954, -26.8834, 55.1983, -15, -12.3312, 108.556, 0.427237, -0.580371, 51.0848, 6.62186, 98.4977, 36.2212, -5.29695, 12.8037, -13.906, -15.7722, -9.32755, 9.35511, 116.947, -63.8988, -10, 105.076, trial: 0, score: 3686 trial: 1, score: 4509 trial: 2, score: 3741 trial: 3, score: 5247 trial: 4, score: 3806 Policy 1: 31.8391, -46.4136, -3.8153, -4.39901, -55, 79.3182, 6.9818, 54.498, -49.5654, -9.16971, 80.5625, 22.6957, -10.0668, -85.3994, -4.01382, -12.1759, 18.1362, 4.8129, 31.959, -61.0091, 1.68675, 122, 18.1782, 0, -85.5379, -12.1367, -6.82792, -2.82571, 0.167852, -10.3439, -42.1106, -3.44522, 102.767, 11.7375, -35.9693, 61.6819, -1.31255, -18.5769, 70.9423, 2.01348, -13.3032, 61.3699, 1.37395, 122, 3.4954, -26.8834, 55.1983, -15, -12.3312, 108.556, 0.427237, -0.580371, 51.0848, 6.62186, 98.4977, 36.2212, -5.29695, 12.8037, -13.906, -15.7722, -9.32755, 9.35511, 116.947, -63.8988, -10, 105.076, Average Score: 4197.8 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 23.0167, -48.8311, -0.305041, -6.78185, -53.1225, 85.2429, 12.3611, 46.3091, -52.1696, -0.777312, 72.4614, 19.0566, -14.0945, -84.4144, 2.61153, -11.6244, 17.5033, -2.48518, 38.939, -59.1417, 5.90854, 116.733, 25.0861, -7.90499, -85.2694, -7.38025, -6.93638, -0.205162, 6.36092, -8.41614, -43.3417, -0.592517, 110.426, 18.4823, -34.4388, 56.7015, -5.37106, -18.0298, 67.3486, 0.0971978, -10.0876, 63.5011, 3.60367, 117.396, 0.0984304, -32.6265, 52.0766, -15, -19.5247, 114.476, 0.843332, -0.096337, 53.2501, 8.19474, 98.9057, 28.0455, -8.89375, 6.96047, -13.524, -16.8468, -10.8139, 12.0768, 114.99, -69.7858, -10, 102.548, trial: 0, score: 3519 trial: 1, score: 3805 trial: 2, score: 3549 trial: 3, score: 3709 trial: 4, score: 4061 Policy 1: 23.0167, -48.8311, -0.305041, -6.78185, -53.1225, 85.2429, 12.3611, 46.3091, -52.1696, -0.777312, 72.4614, 19.0566, -14.0945, -84.4144, 2.61153, -11.6244, 17.5033, -2.48518, 38.939, -59.1417, 5.90854, 116.733, 25.0861, -7.90499, -85.2694, -7.38025, -6.93638, -0.205162, 6.36092, -8.41614, -43.3417, -0.592517, 110.426, 18.4823, -34.4388, 56.7015, -5.37106, -18.0298, 67.3486, 0.0971978, -10.0876, 63.5011, 3.60367, 117.396, 0.0984304, -32.6265, 52.0766, -15, -19.5247, 114.476, 0.843332, -0.096337, 53.2501, 8.19474, 98.9057, 28.0455, -8.89375, 6.96047, -13.524, -16.8468, -10.8139, 12.0768, 114.99, -69.7858, -10, 102.548, Average Score: 3728.6 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 30.5884, -45.7936, -4.5451, -7.98027, -50.7294, 80.5148, 13.3898, 46.4325, -53.4756, -2.2631, 79.8983, 24.2702, -12.864, -88, -2.07008, -11.6165, 15.9718, 1.86615, 38.3256, -64.3533, 5.42594, 119.053, 19.1066, -5.76648, -88, -11.7767, -6.04758, -0.806288, 0.499921, -10.9709, -40.4163, -2.44725, 104.85, 16.854, -32.0607, 52.1904, -1.56077, -23.6578, 71.161, 4.17165, -10.1678, 56.752, 5.10834, 117.529, 2.29776, -33.985, 54.3339, -12.641, -9.57603, 111.755, -3.76104, -1.60534, 51.3051, 8.90489, 103.635, 29.0334, -9.35447, 10.6626, -15, -15.2895, -16.7353, 4.55341, 119.85, -66.715, -10, 103.238, trial: 0, score: 3869 trial: 1, score: 1943 trial: 2, score: 4767 trial: 3, score: 4317 trial: 4, score: 4543 Policy 1: 30.5884, -45.7936, -4.5451, -7.98027, -50.7294, 80.5148, 13.3898, 46.4325, -53.4756, -2.2631, 79.8983, 24.2702, -12.864, -88, -2.07008, -11.6165, 15.9718, 1.86615, 38.3256, -64.3533, 5.42594, 119.053, 19.1066, -5.76648, -88, -11.7767, -6.04758, -0.806288, 0.499921, -10.9709, -40.4163, -2.44725, 104.85, 16.854, -32.0607, 52.1904, -1.56077, -23.6578, 71.161, 4.17165, -10.1678, 56.752, 5.10834, 117.529, 2.29776, -33.985, 54.3339, -12.641, -9.57603, 111.755, -3.76104, -1.60534, 51.3051, 8.90489, 103.635, 29.0334, -9.35447, 10.6626, -15, -15.2895, -16.7353, 4.55341, 119.85, -66.715, -10, 103.238, Average Score: 3887.8 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 27.1957, -48.3555, -2.47521, -1.36125, -53.1141, 88.9306, 6.93228, 53.6275, -46.1139, -5.97286, 73.8963, 20.5826, -15.1551, -88, -0.261187, -9.88658, 24.0492, -0.809585, 29.9569, -64.2635, 8.60069, 122, 18.2335, -1.72853, -87.8266, -10.3831, -9.01485, -5.64024, -0.0832362, -8.48324, -36.6289, -5.09131, 109.512, 21.4231, -29.6427, 56.7689, -0.105127, -19.0034, 73.4467, -2.62447, -11.3008, 56.1471, 3.82585, 116.188, 0, -25.2645, 54.8763, -12.0853, -15.0349, 109.432, 3.27679, -5.20558, 54.8918, 3.52441, 99.2494, 29.9706, -7.75486, 5.02393, -15, -24.1692, -8.57161, 2.90242, 112.476, -69.3044, -5.7645, 105.099, trial: 0, score: 4318 trial: 1, score: 4150 trial: 2, score: 4638 trial: 3, score: 4540 trial: 4, score: 4797 Policy 1: 27.1957, -48.3555, -2.47521, -1.36125, -53.1141, 88.9306, 6.93228, 53.6275, -46.1139, -5.97286, 73.8963, 20.5826, -15.1551, -88, -0.261187, -9.88658, 24.0492, -0.809585, 29.9569, -64.2635, 8.60069, 122, 18.2335, -1.72853, -87.8266, -10.3831, -9.01485, -5.64024, -0.0832362, -8.48324, -36.6289, -5.09131, 109.512, 21.4231, -29.6427, 56.7689, -0.105127, -19.0034, 73.4467, -2.62447, -11.3008, 56.1471, 3.82585, 116.188, 0, -25.2645, 54.8763, -12.0853, -15.0349, 109.432, 3.27679, -5.20558, 54.8918, 3.52441, 99.2494, 29.9706, -7.75486, 5.02393, -15, -24.1692, -8.57161, 2.90242, 112.476, -69.3044, -5.7645, 105.099, Average Score: 4488.6 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 28.6894, -45.0214, 2.84404, -1.56237, -51.266, 80.0086, 8.09169, 50.298, -50.2391, -8.56682, 73.6269, 21.6316, -11.9822, -84.8139, 1.31796, -6.47577, 18.367, 5.05862, 37.1687, -60.7115, 7.62509, 119.913, 20.4832, 0, -88, -15, -10.4773, 0.132426, 0.466261, -9.20739, -38.8707, -4.53033, 105.157, 18.4229, -32.0767, 55.7459, -6.1943, -20.6768, 70.3323, 5.40563, -11.3289, 63.4476, 3.90445, 117.066, 0, -26.8184, 51.5366, -15, -10.9841, 107.794, -5.50185, 3.57707, 53.5542, 11.9576, 96.8459, 33.8946, -8.48765, 4.41251, -13.7614, -18.5773, -7.92851, 7.08527, 115.324, -68.5513, -10, 105.627, trial: 0, score: 3773 trial: 1, score: 4029 trial: 2, score: 3709 trial: 3, score: 4093 trial: 4, score: 2205 Policy 1: 28.6894, -45.0214, 2.84404, -1.56237, -51.266, 80.0086, 8.09169, 50.298, -50.2391, -8.56682, 73.6269, 21.6316, -11.9822, -84.8139, 1.31796, -6.47577, 18.367, 5.05862, 37.1687, -60.7115, 7.62509, 119.913, 20.4832, 0, -88, -15, -10.4773, 0.132426, 0.466261, -9.20739, -38.8707, -4.53033, 105.157, 18.4229, -32.0767, 55.7459, -6.1943, -20.6768, 70.3323, 5.40563, -11.3289, 63.4476, 3.90445, 117.066, 0, -26.8184, 51.5366, -15, -10.9841, 107.794, -5.50185, 3.57707, 53.5542, 11.9576, 96.8459, 33.8946, -8.48765, 4.41251, -13.7614, -18.5773, -7.92851, 7.08527, 115.324, -68.5513, -10, 105.627, Average Score: 3561.8 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 31.6082, -50.6543, -6.32756, -7.30972, -55, 80.4937, 6.2866, 50.4403, -47.2413, -7.02669, 79.9098, 18.3265, -17.8551, -84.9978, 3.37326, -11.3169, 22.6903, 2.19916, 31.0024, -62.7181, 7.09396, 117.413, 21.4367, -8.75666, -84.7377, -14.3655, -11.8942, 0.558966, 6.11832, -11.7967, -43.0005, -4.44828, 107.791, 18.7583, -35.175, 56.2176, -1.13547, -24.1734, 73.3002, 4.90751, -13.3634, 62.1293, 2.33444, 119.521, 0, -26.105, 52.0992, -9.334, -12.8492, 111.71, 3.1452, 0.968651, 51.589, 3.15296, 105.622, 31.1305, -12.7406, 5.06658, -14.6014, -15.8664, -8.61982, 11.6569, 113.658, -66.3034, -5.15335, 111.208, trial: 0, score: 3958 trial: 1, score: 3582 trial: 2, score: 3837 trial: 3, score: 4063 trial: 4, score: 3005 Policy 1: 31.6082, -50.6543, -6.32756, -7.30972, -55, 80.4937, 6.2866, 50.4403, -47.2413, -7.02669, 79.9098, 18.3265, -17.8551, -84.9978, 3.37326, -11.3169, 22.6903, 2.19916, 31.0024, -62.7181, 7.09396, 117.413, 21.4367, -8.75666, -84.7377, -14.3655, -11.8942, 0.558966, 6.11832, -11.7967, -43.0005, -4.44828, 107.791, 18.7583, -35.175, 56.2176, -1.13547, -24.1734, 73.3002, 4.90751, -13.3634, 62.1293, 2.33444, 119.521, 0, -26.105, 52.0992, -9.334, -12.8492, 111.71, 3.1452, 0.968651, 51.589, 3.15296, 105.622, 31.1305, -12.7406, 5.06658, -14.6014, -15.8664, -8.61982, 11.6569, 113.658, -66.3034, -5.15335, 111.208, Average Score: 3689 --------------------------------- New Iteration Current Best Policy: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Current Best Policy Score: 4776.8 Policy 0 will be: 27.1999, -48.9924, -1.92235, -5.89237, -53.7534, 83.9823, 8.79778, 50.5327, -50.0794, -5.33721, 76.3253, 22.3741, -14.3447, -87.7593, 0.0581858, -7.38627, 20.8727, 0.882268, 34.3741, -62.7465, 4.96564, 120.705, 20.2546, -4.68268, -87.6173, -11.657, -7.30941, -4.07498, 3.19226, -10.3837, -39.6735, -1.9058, 106.469, 16.4909, -33.6477, 56.8003, -3.09618, -21.3551, 70.7258, 1.28742, -13.0824, 60.1471, 4.61141, 120.39, 0, -29.0165, 53.306, -13.2037, -14.5411, 111.277, -0.5266, -0.771861, 50.113, 8.09193, 101.032, 32.4784, -8.95025, 9.1441, -15, -19.9496, -12.8543, 7.34267, 116.15, -68.5179, -10, 106.253, Policy 1 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, trial: 0, score: 5182 trial: 1, score: 4862 trial: 2, score: 5373 trial: 3, score: 5341 trial: 4, score: 4861 Policy 1: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Average Score: 5123.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 19.5144, -52.4061, -5.87756, -3.47173, -55, 87.0431, 10.4021, 56.306, -48.2152, -2.65425, 77.5737, 23.6458, -21.4836, -83.3381, -2.94116, -3, 19.8085, 6.68404, 32.4141, -63.6884, 3.91751, 116.636, 28.87, -1.88928, -86.6216, -8.70734, -8.59484, -6.29163, 3.84883, -17.046, -40.6862, -2.80719, 110.606, 18.2739, -40.0306, 55.7519, -1.81747, -28.1774, 67.8752, 1.06975, -13.9842, 58.7818, 1.6886, 117.801, 2.16579, -24.364, 58.2673, -11.4414, -10.3304, 112.839, -9.59531, 3.56875, 47.2903, 5.87134, 101.601, 25.6307, -8.6913, 3.65212, -15, -20.3447, -19.5692, 5.86191, 112.181, -70.0262, -10, 104.897, trial: 0, score: 2621 trial: 1, score: 1885 trial: 2, score: 4093 trial: 3, score: 4414 trial: 4, score: 3549 Policy 1: 19.5144, -52.4061, -5.87756, -3.47173, -55, 87.0431, 10.4021, 56.306, -48.2152, -2.65425, 77.5737, 23.6458, -21.4836, -83.3381, -2.94116, -3, 19.8085, 6.68404, 32.4141, -63.6884, 3.91751, 116.636, 28.87, -1.88928, -86.6216, -8.70734, -8.59484, -6.29163, 3.84883, -17.046, -40.6862, -2.80719, 110.606, 18.2739, -40.0306, 55.7519, -1.81747, -28.1774, 67.8752, 1.06975, -13.9842, 58.7818, 1.6886, 117.801, 2.16579, -24.364, 58.2673, -11.4414, -10.3304, 112.839, -9.59531, 3.56875, 47.2903, 5.87134, 101.601, 25.6307, -8.6913, 3.65212, -15, -20.3447, -19.5692, 5.86191, 112.181, -70.0262, -10, 104.897, Average Score: 3312.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.3803, -43.0911, -5.0885, -0.133462, -55, 83.0813, 12.4637, 55.7003, -52.4153, -2.78064, 73.1181, 25.9764, -22.9535, -84.826, -3.65024, -3, 16.8982, 6.92778, 29.2605, -65.1858, 11.1751, 120.522, 24.4238, -2.27991, -82.688, -9.94295, -10.1439, -3.52157, 5.93849, -12.7071, -37.4965, -0.470946, 102.542, 13.8263, -42.6787, 52.0778, -2.26208, -25.4698, 77.0498, 0.897313, -9.32296, 54.9101, -0.310676, 122, 0, -33.6184, 51.7593, -15, -13.119, 106.446, -9.74982, -1.88197, 50.1866, 5.24163, 95.4038, 29.1671, -12.8784, 9.5247, -15, -15.36, -21.7411, 3.29193, 113.058, -72.6751, -7.94925, 108.515, trial: 0, score: 2238 trial: 1, score: 3326 trial: 2, score: 3967 trial: 3, score: 4061 trial: 4, score: 3069 Policy 1: 23.3803, -43.0911, -5.0885, -0.133462, -55, 83.0813, 12.4637, 55.7003, -52.4153, -2.78064, 73.1181, 25.9764, -22.9535, -84.826, -3.65024, -3, 16.8982, 6.92778, 29.2605, -65.1858, 11.1751, 120.522, 24.4238, -2.27991, -82.688, -9.94295, -10.1439, -3.52157, 5.93849, -12.7071, -37.4965, -0.470946, 102.542, 13.8263, -42.6787, 52.0778, -2.26208, -25.4698, 77.0498, 0.897313, -9.32296, 54.9101, -0.310676, 122, 0, -33.6184, 51.7593, -15, -13.119, 106.446, -9.74982, -1.88197, 50.1866, 5.24163, 95.4038, 29.1671, -12.8784, 9.5247, -15, -15.36, -21.7411, 3.29193, 113.058, -72.6751, -7.94925, 108.515, Average Score: 3332.2 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 19.763, -44.3635, 0.755195, -0.5453, -49.4057, 83.6307, 6.35221, 59.2425, -51.226, -8.9109, 78.8781, 23.63, -19.6407, -86.7902, 0.486508, -3.83441, 18.0959, 1.93877, 34.334, -69.2879, 10.5889, 117.878, 28.4579, 0, -85.6046, -4.15068, -9.71547, -3.43688, 7.56721, -15.213, -42.2782, 5.13374, 102.761, 16.21, -36.6973, 50.8883, -0.477044, -25.2525, 68.5814, 7.5411, -12.449, 55.5874, 2.31848, 122, 5.90788, -26.1648, 52.2526, -12.2727, -18.7225, 110.428, -9.6331, 4.64745, 48.7032, 6.06367, 97.8057, 26.9401, -11.7109, 6.67522, -15, -10.7953, -19.7337, 6.64715, 108.305, -68.1669, -5.7476, 110.097, trial: 0, score: 4157 trial: 1, score: 3293 trial: 2, score: 1919 trial: 3, score: 2973 trial: 4, score: 2582 Policy 1: 19.763, -44.3635, 0.755195, -0.5453, -49.4057, 83.6307, 6.35221, 59.2425, -51.226, -8.9109, 78.8781, 23.63, -19.6407, -86.7902, 0.486508, -3.83441, 18.0959, 1.93877, 34.334, -69.2879, 10.5889, 117.878, 28.4579, 0, -85.6046, -4.15068, -9.71547, -3.43688, 7.56721, -15.213, -42.2782, 5.13374, 102.761, 16.21, -36.6973, 50.8883, -0.477044, -25.2525, 68.5814, 7.5411, -12.449, 55.5874, 2.31848, 122, 5.90788, -26.1648, 52.2526, -12.2727, -18.7225, 110.428, -9.6331, 4.64745, 48.7032, 6.06367, 97.8057, 26.9401, -11.7109, 6.67522, -15, -10.7953, -19.7337, 6.64715, 108.305, -68.1669, -5.7476, 110.097, Average Score: 2984.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.5405, -43.1315, 0.079176, -7.19783, -51.2556, 84.6008, 12.1553, 49.9424, -50.7449, -10, 78.4649, 30.2944, -22.455, -88, -4.37544, -3, 20.9229, 1.96629, 35.8258, -60.9765, 3.45731, 120.587, 21.7792, 0, -86.6209, -11.5685, -6.37002, -5.33901, 5.36541, -15.2681, -44.6106, 2.35435, 104.719, 12.8443, -41.7387, 58.6643, -1.45964, -25.535, 72.9887, 6.29395, -16.4173, 53.7558, -4.55598, 119.025, 6.31452, -25.0754, 53.0354, -13.57, -17.0732, 110.776, -4.18746, 0.951846, 44.323, 3.86657, 95.6948, 26.3086, -13.565, 5.51168, -15, -14.8052, -17.7518, 1.5463, 113.901, -67.6909, -10, 105.902, trial: 0, score: 1854 trial: 1, score: 3262 trial: 2, score: 2141 trial: 3, score: 3261 trial: 4, score: 2173 Policy 1: 23.5405, -43.1315, 0.079176, -7.19783, -51.2556, 84.6008, 12.1553, 49.9424, -50.7449, -10, 78.4649, 30.2944, -22.455, -88, -4.37544, -3, 20.9229, 1.96629, 35.8258, -60.9765, 3.45731, 120.587, 21.7792, 0, -86.6209, -11.5685, -6.37002, -5.33901, 5.36541, -15.2681, -44.6106, 2.35435, 104.719, 12.8443, -41.7387, 58.6643, -1.45964, -25.535, 72.9887, 6.29395, -16.4173, 53.7558, -4.55598, 119.025, 6.31452, -25.0754, 53.0354, -13.57, -17.0732, 110.776, -4.18746, 0.951846, 44.323, 3.86657, 95.6948, 26.3086, -13.565, 5.51168, -15, -14.8052, -17.7518, 1.5463, 113.901, -67.6909, -10, 105.902, Average Score: 2538.2 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.9837, -42.5069, -6.40884, -8.63429, -55, 83.1399, 7.19549, 53.9642, -49.8735, -3.99257, 72.3772, 27.0615, -20.7458, -88, -4.18521, -3, 21.0622, 7.15389, 29.1819, -63.4326, 3.59967, 120.187, 25.9433, 0, -88, -10.8469, -11.2534, -1.12774, 1.34907, -16.4708, -44.9937, 2.26513, 106.285, 11.2376, -38.65, 56.8877, 1.45971, -19.8012, 70.2094, 5.32844, -8.80982, 54.5972, 0.867592, 122, 3.86128, -33.733, 50.0214, -12.6675, -13.7846, 110.491, -8.15452, -4.17086, 46.9482, 8.06785, 97.1972, 27.53, -11.0399, 7.4696, -10.3318, -13.6672, -17.5672, 2.30599, 115.162, -67.0149, -10, 108.538, trial: 0, score: 3863 trial: 1, score: 2717 trial: 2, score: 2432 trial: 3, score: 2134 trial: 4, score: 2174 Policy 1: 20.9837, -42.5069, -6.40884, -8.63429, -55, 83.1399, 7.19549, 53.9642, -49.8735, -3.99257, 72.3772, 27.0615, -20.7458, -88, -4.18521, -3, 21.0622, 7.15389, 29.1819, -63.4326, 3.59967, 120.187, 25.9433, 0, -88, -10.8469, -11.2534, -1.12774, 1.34907, -16.4708, -44.9937, 2.26513, 106.285, 11.2376, -38.65, 56.8877, 1.45971, -19.8012, 70.2094, 5.32844, -8.80982, 54.5972, 0.867592, 122, 3.86128, -33.733, 50.0214, -12.6675, -13.7846, 110.491, -8.15452, -4.17086, 46.9482, 8.06785, 97.1972, 27.53, -11.0399, 7.4696, -10.3318, -13.6672, -17.5672, 2.30599, 115.162, -67.0149, -10, 108.538, Average Score: 2664 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.56, -49.4199, -3.55069, -0.62648, -51.6229, 81.9586, 14.5164, 55.8627, -50.8131, -2.65328, 72.459, 22.6058, -17.8128, -83.5417, -4.65536, -7.31244, 24.3368, 6.52646, 35.1596, -65.2246, 3.94122, 120.462, 28.0571, 0, -84.0414, -10.4762, -9.39177, -4.31813, 8.31317, -16.2851, -39.5378, 5.29583, 101.619, 11.9193, -39.0388, 54.0535, -0.0662199, -27.7621, 67.8144, 3.18853, -9.60147, 61.5157, 4.11508, 122, 1.90327, -31.9548, 49.518, -15, -11.9829, 112.876, -6.55988, -2.55197, 45.4318, 4.9579, 96.32, 26.5602, -9.42318, 5.12454, -14.2916, -18.954, -14.2124, 4.15771, 111.707, -66.9894, -5.20198, 107.649, trial: 0, score: 2207 trial: 1, score: 3638 trial: 2, score: 5149 trial: 3, score: 4893 trial: 4, score: 2461 Policy 1: 23.56, -49.4199, -3.55069, -0.62648, -51.6229, 81.9586, 14.5164, 55.8627, -50.8131, -2.65328, 72.459, 22.6058, -17.8128, -83.5417, -4.65536, -7.31244, 24.3368, 6.52646, 35.1596, -65.2246, 3.94122, 120.462, 28.0571, 0, -84.0414, -10.4762, -9.39177, -4.31813, 8.31317, -16.2851, -39.5378, 5.29583, 101.619, 11.9193, -39.0388, 54.0535, -0.0662199, -27.7621, 67.8144, 3.18853, -9.60147, 61.5157, 4.11508, 122, 1.90327, -31.9548, 49.518, -15, -11.9829, 112.876, -6.55988, -2.55197, 45.4318, 4.9579, 96.32, 26.5602, -9.42318, 5.12454, -14.2916, -18.954, -14.2124, 4.15771, 111.707, -66.9894, -5.20198, 107.649, Average Score: 3669.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.6811, -48.4459, -8.26701, -4.03902, -49.2262, 82.9168, 9.84831, 58.6983, -50.6183, -4.81251, 75.4501, 22.2698, -20.2622, -88, -4.73832, -6.56401, 17.2083, -0.386956, 34.9715, -68.72, 9.77141, 117.427, 22.1986, 0, -82.4916, -12.2929, -14.4083, -5.21887, 8.51099, -15.6367, -42.5668, 3.1782, 102.237, 11.5502, -37.8074, 58.7871, -8.3168, -23.9221, 72.7429, 1.88063, -9.86894, 58.8275, 3.35766, 122, 0, -31.9125, 54.6178, -15, -14.4273, 103.926, -2.45281, 4.59429, 43.3478, 0.146317, 96.9208, 33.7761, -9.45577, 10.4037, -15, -17.6067, -17.6217, 9.38469, 110.148, -69.9734, -7.50471, 113.03, trial: 0, score: 5341 trial: 1, score: 4573 trial: 2, score: 4221 trial: 3, score: 4541 trial: 4, score: 4637 Policy 1: 23.6811, -48.4459, -8.26701, -4.03902, -49.2262, 82.9168, 9.84831, 58.6983, -50.6183, -4.81251, 75.4501, 22.2698, -20.2622, -88, -4.73832, -6.56401, 17.2083, -0.386956, 34.9715, -68.72, 9.77141, 117.427, 22.1986, 0, -82.4916, -12.2929, -14.4083, -5.21887, 8.51099, -15.6367, -42.5668, 3.1782, 102.237, 11.5502, -37.8074, 58.7871, -8.3168, -23.9221, 72.7429, 1.88063, -9.86894, 58.8275, 3.35766, 122, 0, -31.9125, 54.6178, -15, -14.4273, 103.926, -2.45281, 4.59429, 43.3478, 0.146317, 96.9208, 33.7761, -9.45577, 10.4037, -15, -17.6067, -17.6217, 9.38469, 110.148, -69.9734, -7.50471, 113.03, Average Score: 4662.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 28.4743, -51.5543, -7.48845, -1.0724, -55, 85.8628, 14.5603, 56.6735, -48.5651, -3.21825, 77.0694, 25.0751, -20.5498, -88, -3.35606, -6.06047, 18.1007, 7.30276, 29.5703, -66.3548, 8.18429, 120.195, 28.2963, 0, -82.472, -6.08083, -13.1437, -2.93307, 7.75464, -11.3735, -37.3679, 0.480218, 109.226, 10.4932, -42.7467, 52.9677, -2.06453, -22.4894, 67.6763, 7.57328, -14.4576, 53.2661, -1.27872, 122, 5.84444, -33.7377, 55.1191, -15, -17.3405, 112.007, -5.87194, 2.09636, 52.3882, 1.23361, 101.177, 31.0534, -16.0424, 5.30608, -15, -14.4448, -17.8427, 6.2147, 109.217, -68.683, -10, 108.503, trial: 0, score: 4285 trial: 1, score: 3863 trial: 2, score: 3999 trial: 3, score: 1949 trial: 4, score: 2781 Policy 1: 28.4743, -51.5543, -7.48845, -1.0724, -55, 85.8628, 14.5603, 56.6735, -48.5651, -3.21825, 77.0694, 25.0751, -20.5498, -88, -3.35606, -6.06047, 18.1007, 7.30276, 29.5703, -66.3548, 8.18429, 120.195, 28.2963, 0, -82.472, -6.08083, -13.1437, -2.93307, 7.75464, -11.3735, -37.3679, 0.480218, 109.226, 10.4932, -42.7467, 52.9677, -2.06453, -22.4894, 67.6763, 7.57328, -14.4576, 53.2661, -1.27872, 122, 5.84444, -33.7377, 55.1191, -15, -17.3405, 112.007, -5.87194, 2.09636, 52.3882, 1.23361, 101.177, 31.0534, -16.0424, 5.30608, -15, -14.4448, -17.8427, 6.2147, 109.217, -68.683, -10, 108.503, Average Score: 3375.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.5873, -45.0282, -2.52221, -1.69966, -52.1729, 80.6318, 6.43186, 50.7388, -51.3013, -9.17141, 74.5545, 23.7479, -17.2612, -86.2012, -6.16015, -3, 20.0288, 6.45815, 30.6753, -66.7591, 4.47807, 115.812, 24.2341, 0, -88, -4.93892, -14.3203, 0.11572, 9.98938, -14.6004, -38.049, 6.29563, 110.512, 9.84619, -37.4434, 59.061, -0.229322, -27.1879, 76.4421, -0.257493, -8.34856, 57.8999, 0.486493, 122, 2.19014, -26.1774, 51.9072, -14.2125, -15.2174, 106.59, -3.09561, 4.24885, 45.4596, 4.92226, 95.4375, 33.9283, -15.2599, 4.91298, -15, -11.9823, -16.4229, 0.365478, 108.672, -66.0116, -9.54638, 111.7, trial: 0, score: 2687 trial: 1, score: 3709 trial: 2, score: 3837 trial: 3, score: 2998 trial: 4, score: 3421 Policy 1: 20.5873, -45.0282, -2.52221, -1.69966, -52.1729, 80.6318, 6.43186, 50.7388, -51.3013, -9.17141, 74.5545, 23.7479, -17.2612, -86.2012, -6.16015, -3, 20.0288, 6.45815, 30.6753, -66.7591, 4.47807, 115.812, 24.2341, 0, -88, -4.93892, -14.3203, 0.11572, 9.98938, -14.6004, -38.049, 6.29563, 110.512, 9.84619, -37.4434, 59.061, -0.229322, -27.1879, 76.4421, -0.257493, -8.34856, 57.8999, 0.486493, 122, 2.19014, -26.1774, 51.9072, -14.2125, -15.2174, 106.59, -3.09561, 4.24885, 45.4596, 4.92226, 95.4375, 33.9283, -15.2599, 4.91298, -15, -11.9823, -16.4229, 0.365478, 108.672, -66.0116, -9.54638, 111.7, Average Score: 3330.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.4918, -43.7893, -4.43001, -3.21698, -49.165, 86.944, 7.21094, 52.3544, -49.617, -5.34194, 81.8462, 27.8028, -23.4762, -88, 0.902256, -3, 25.2673, 7.29507, 35.2373, -60.3086, 4.05391, 114.678, 26.3754, 0, -88, -5.20389, -6.15908, 1.24273, 8.0158, -12.5549, -36.181, -0.766757, 104.415, 9.93628, -36.1621, 58.5671, -1.4309, -24.6494, 67.3507, 8.15245, -10.4, 55.8851, -0.152277, 121.603, 0.4379, -32.0153, 58.3876, -15, -11.1255, 112.484, -1.10213, -4.95581, 49.6751, 2.53364, 96.8167, 28.2929, -10.3863, 2.26605, -15, -15.7839, -21.7702, 8.51035, 115.928, -70.4725, -10, 113.352, trial: 0, score: 2119 trial: 1, score: 3584 trial: 2, score: 2935 trial: 3, score: 3421 trial: 4, score: 2173 Policy 1: 27.4918, -43.7893, -4.43001, -3.21698, -49.165, 86.944, 7.21094, 52.3544, -49.617, -5.34194, 81.8462, 27.8028, -23.4762, -88, 0.902256, -3, 25.2673, 7.29507, 35.2373, -60.3086, 4.05391, 114.678, 26.3754, 0, -88, -5.20389, -6.15908, 1.24273, 8.0158, -12.5549, -36.181, -0.766757, 104.415, 9.93628, -36.1621, 58.5671, -1.4309, -24.6494, 67.3507, 8.15245, -10.4, 55.8851, -0.152277, 121.603, 0.4379, -32.0153, 58.3876, -15, -11.1255, 112.484, -1.10213, -4.95581, 49.6751, 2.53364, 96.8167, 28.2929, -10.3863, 2.26605, -15, -15.7839, -21.7702, 8.51035, 115.928, -70.4725, -10, 113.352, Average Score: 2846.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 24.2407, -50.7153, -6.43299, -8.69156, -55, 85.1049, 9.0726, 52.4302, -49.4927, -3.49025, 80.3826, 29.2677, -16.7208, -83.1291, -1.15213, -3, 20.9059, 7.3392, 34.789, -64.2469, 5.36815, 119.969, 23.0762, 0, -85.8378, -4.03913, -11.6396, -5.26362, 2.75993, -11.3184, -45.4804, -3.53736, 110.26, 17.7925, -39.0673, 58.9878, -1.57874, -22.1523, 71.5299, -1.19472, -13.1261, 56.986, -4.16354, 119.543, 0, -25.1784, 51.1565, -12.2271, -17.1164, 104.2, -3.51547, -3.67285, 44.9419, 7.91492, 95.3705, 30.2561, -7.44558, 4.99441, -12.2755, -11.5999, -15.8196, 0.245812, 112.511, -67.2055, -10, 111.705, trial: 0, score: 3830 trial: 1, score: 2038 trial: 2, score: 4637 trial: 3, score: 3197 trial: 4, score: 3415 Policy 1: 24.2407, -50.7153, -6.43299, -8.69156, -55, 85.1049, 9.0726, 52.4302, -49.4927, -3.49025, 80.3826, 29.2677, -16.7208, -83.1291, -1.15213, -3, 20.9059, 7.3392, 34.789, -64.2469, 5.36815, 119.969, 23.0762, 0, -85.8378, -4.03913, -11.6396, -5.26362, 2.75993, -11.3184, -45.4804, -3.53736, 110.26, 17.7925, -39.0673, 58.9878, -1.57874, -22.1523, 71.5299, -1.19472, -13.1261, 56.986, -4.16354, 119.543, 0, -25.1784, 51.1565, -12.2271, -17.1164, 104.2, -3.51547, -3.67285, 44.9419, 7.91492, 95.3705, 30.2561, -7.44558, 4.99441, -12.2755, -11.5999, -15.8196, 0.245812, 112.511, -67.2055, -10, 111.705, Average Score: 3423.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 25.7611, -42.7591, -2.51065, -0.108736, -54.8817, 79.0764, 6.13951, 53.3999, -51.9878, -2.68226, 77.9525, 25.8371, -22.6104, -88, -1.02541, -3, 16.1366, 2.29754, 30.2237, -64.7162, 3.21164, 122, 28.2291, 0, -88, -6.2537, -12.6422, 1.20251, 1.46071, -7.96705, -44.0675, 2.06047, 103.231, 12.4809, -33.5221, 55.3182, 0.800225, -28.4675, 69.9204, 7.88344, -7.86636, 53.3426, 1.88934, 122, 0, -24.9849, 53.2453, -15, -10.7867, 105.307, -6.34439, 0.85896, 47.4413, 5.13095, 97.6064, 27.9142, -10.2196, 3.8347, -10.0625, -16.9374, -20.8835, 5.53873, 115.634, -71.2721, -10, 110.002, trial: 0, score: 4445 trial: 1, score: 4829 trial: 2, score: 4351 trial: 3, score: 2269 trial: 4, score: 3325 Policy 1: 25.7611, -42.7591, -2.51065, -0.108736, -54.8817, 79.0764, 6.13951, 53.3999, -51.9878, -2.68226, 77.9525, 25.8371, -22.6104, -88, -1.02541, -3, 16.1366, 2.29754, 30.2237, -64.7162, 3.21164, 122, 28.2291, 0, -88, -6.2537, -12.6422, 1.20251, 1.46071, -7.96705, -44.0675, 2.06047, 103.231, 12.4809, -33.5221, 55.3182, 0.800225, -28.4675, 69.9204, 7.88344, -7.86636, 53.3426, 1.88934, 122, 0, -24.9849, 53.2453, -15, -10.7867, 105.307, -6.34439, 0.85896, 47.4413, 5.13095, 97.6064, 27.9142, -10.2196, 3.8347, -10.0625, -16.9374, -20.8835, 5.53873, 115.634, -71.2721, -10, 110.002, Average Score: 3843.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 25.3036, -50.4867, -5.51873, -5.91647, -54.6314, 82.1135, 7.3707, 58.6013, -44.1423, -6.85121, 75.1335, 25.962, -14.4387, -88, 3.236, -3, 18.0344, 3.47544, 34.8784, -62.8637, 3.86076, 122, 23.2966, -4.2746, -87.7806, -6.56956, -13.6021, -0.792576, 7.07993, -14.2382, -40.7287, 5.11464, 101.405, 16.0443, -37.2858, 57.4524, -6.18469, -23.0871, 69.9507, 4.57277, -11.4451, 59.1004, 4.30686, 122, 3.39853, -25.2017, 55.5608, -14.1525, -14.0299, 112.001, -9.07039, 0.4369, 52.3243, 1.81449, 96.0699, 33.6649, -7.50545, 10.4413, -15, -15.5094, -13.9759, 9.23681, 110.861, -63.5575, -6.11199, 107.34, trial: 0, score: 2365 trial: 1, score: 2206 trial: 2, score: 2365 trial: 3, score: 3357 trial: 4, score: 3997 Policy 1: 25.3036, -50.4867, -5.51873, -5.91647, -54.6314, 82.1135, 7.3707, 58.6013, -44.1423, -6.85121, 75.1335, 25.962, -14.4387, -88, 3.236, -3, 18.0344, 3.47544, 34.8784, -62.8637, 3.86076, 122, 23.2966, -4.2746, -87.7806, -6.56956, -13.6021, -0.792576, 7.07993, -14.2382, -40.7287, 5.11464, 101.405, 16.0443, -37.2858, 57.4524, -6.18469, -23.0871, 69.9507, 4.57277, -11.4451, 59.1004, 4.30686, 122, 3.39853, -25.2017, 55.5608, -14.1525, -14.0299, 112.001, -9.07039, 0.4369, 52.3243, 1.81449, 96.0699, 33.6649, -7.50545, 10.4413, -15, -15.5094, -13.9759, 9.23681, 110.861, -63.5575, -6.11199, 107.34, Average Score: 2858 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 26.0356, -48.3227, -6.15166, -3.88042, -55, 83.5504, 8.82505, 55.7906, -47.6826, -4.66052, 79.801, 26.1744, -18.2772, -88, 3.40012, -7.53143, 21.8834, 2.51116, 35.6281, -60.7565, 7.2358, 114.038, 23.8439, 0, -84.8907, -4.26131, -12.3099, 0.943283, 4.40219, -17.2609, -39.6992, -2.30553, 108.691, 15.6249, -35.416, 57.7053, -6.93966, -21.4028, 77.1416, 7.42564, -11.791, 57.1803, 2.4279, 122, 3.74741, -24.6236, 58.0514, -15, -12.9985, 108.532, -1.45913, -0.326709, 49.8627, -0.412313, 102.275, 33.2532, -9.08398, 10.2503, -12.0988, -17.5171, -19.515, 3.79898, 107.867, -70.3634, -9.74109, 106.259, trial: 0, score: 4125 trial: 1, score: 4669 trial: 2, score: 2815 trial: 3, score: 3709 trial: 4, score: 3677 Policy 1: 26.0356, -48.3227, -6.15166, -3.88042, -55, 83.5504, 8.82505, 55.7906, -47.6826, -4.66052, 79.801, 26.1744, -18.2772, -88, 3.40012, -7.53143, 21.8834, 2.51116, 35.6281, -60.7565, 7.2358, 114.038, 23.8439, 0, -84.8907, -4.26131, -12.3099, 0.943283, 4.40219, -17.2609, -39.6992, -2.30553, 108.691, 15.6249, -35.416, 57.7053, -6.93966, -21.4028, 77.1416, 7.42564, -11.791, 57.1803, 2.4279, 122, 3.74741, -24.6236, 58.0514, -15, -12.9985, 108.532, -1.45913, -0.326709, 49.8627, -0.412313, 102.275, 33.2532, -9.08398, 10.2503, -12.0988, -17.5171, -19.515, 3.79898, 107.867, -70.3634, -9.74109, 106.259, Average Score: 3799 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 24.8359, -48.306, -0.382459, -3.03135, -50.6078, 86.8629, 5.53967, 55.4763, -50.7287, -7.51584, 75.1339, 21.3623, -21.916, -84.8249, 3.15098, -3, 20.2479, 2.00193, 30.7209, -63.5114, 6.2337, 118.806, 19.6048, 0, -88, -5.554, -11.0875, -4.7656, 2.39017, -16.4011, -43.566, 0.570778, 108.098, 17.1601, -35.6007, 54.4553, -6.77216, -18.8355, 72.0127, -0.896931, -9.84999, 59.4377, -0.28855, 122, 3.62391, -33.4399, 58.432, -14.9505, -17.5397, 105.48, -6.90114, -1.23201, 46.4213, 5.30373, 95.5951, 33.3981, -8.51535, 4.33606, -15, -20.6488, -21.1916, 7.75686, 108.453, -71.5015, -10, 113.306, trial: 0, score: 1981 trial: 1, score: 3647 trial: 2, score: 3613 trial: 3, score: 2878 trial: 4, score: 4278 Policy 1: 24.8359, -48.306, -0.382459, -3.03135, -50.6078, 86.8629, 5.53967, 55.4763, -50.7287, -7.51584, 75.1339, 21.3623, -21.916, -84.8249, 3.15098, -3, 20.2479, 2.00193, 30.7209, -63.5114, 6.2337, 118.806, 19.6048, 0, -88, -5.554, -11.0875, -4.7656, 2.39017, -16.4011, -43.566, 0.570778, 108.098, 17.1601, -35.6007, 54.4553, -6.77216, -18.8355, 72.0127, -0.896931, -9.84999, 59.4377, -0.28855, 122, 3.62391, -33.4399, 58.432, -14.9505, -17.5397, 105.48, -6.90114, -1.23201, 46.4213, 5.30373, 95.5951, 33.3981, -8.51535, 4.33606, -15, -20.6488, -21.1916, 7.75686, 108.453, -71.5015, -10, 113.306, Average Score: 3279.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 21.364, -51.6052, 0.241337, -1.20803, -51.1468, 82.0513, 14.6069, 49.841, -52.4887, -10, 77.4869, 23.0153, -20.8573, -86.028, -4.9436, -6.04238, 23.3511, 6.14934, 35.3871, -66.5303, 6.54246, 119.545, 25.0766, 0, -85.2071, -12.4009, -11.5888, -2.91807, 4.76371, -10.2979, -36.3778, 3.15271, 104.438, 14.9183, -39.62, 52.1301, -2.58816, -22.7602, 73.2565, 3.86983, -16.7738, 58.6381, -1.99028, 122, 0.179412, -27.7376, 51.2661, -11.8382, -14.0645, 110.218, -4.96532, -2.54403, 52.4428, 8.77609, 98.9092, 30.2228, -10.9713, 9.19447, -15, -18.3343, -12.8665, 6.3451, 110.042, -72.2133, -10, 104.675, trial: 0, score: 2556 trial: 1, score: 3839 trial: 2, score: 4959 trial: 3, score: 5469 trial: 4, score: 3902 Policy 1: 21.364, -51.6052, 0.241337, -1.20803, -51.1468, 82.0513, 14.6069, 49.841, -52.4887, -10, 77.4869, 23.0153, -20.8573, -86.028, -4.9436, -6.04238, 23.3511, 6.14934, 35.3871, -66.5303, 6.54246, 119.545, 25.0766, 0, -85.2071, -12.4009, -11.5888, -2.91807, 4.76371, -10.2979, -36.3778, 3.15271, 104.438, 14.9183, -39.62, 52.1301, -2.58816, -22.7602, 73.2565, 3.86983, -16.7738, 58.6381, -1.99028, 122, 0.179412, -27.7376, 51.2661, -11.8382, -14.0645, 110.218, -4.96532, -2.54403, 52.4428, 8.77609, 98.9092, 30.2228, -10.9713, 9.19447, -15, -18.3343, -12.8665, 6.3451, 110.042, -72.2133, -10, 104.675, Average Score: 4145 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.3482, -50.0657, -1.4624, -7.57766, -55, 86.9752, 7.35057, 54.5792, -43.9208, -9.51687, 77.7264, 28.743, -20.6572, -88, 3.18415, -5.89749, 19.9393, 3.0289, 32.6145, -68.2732, 9.0816, 113.523, 27.6946, 0, -84.464, -9.14726, -9.54942, -1.46986, 3.03226, -17.3888, -44.6767, 1.36275, 101.197, 10.1552, -33.3783, 59.0738, -8.33246, -21.0073, 74.6388, 0.354523, -15.0871, 60.8062, 3.55719, 122, 0, -33.4128, 58.3866, -11.5587, -18.1574, 108.154, -0.331224, -4.50761, 48.8037, -0.46357, 94.0043, 28.0368, -12.868, 9.4979, -15, -13.0404, -17.7832, 2.80134, 107.796, -63.4102, -6.43205, 107.53, trial: 0, score: 4798 trial: 1, score: 4541 trial: 2, score: 4477 trial: 3, score: 5117 trial: 4, score: 5375 Policy 1: 23.3482, -50.0657, -1.4624, -7.57766, -55, 86.9752, 7.35057, 54.5792, -43.9208, -9.51687, 77.7264, 28.743, -20.6572, -88, 3.18415, -5.89749, 19.9393, 3.0289, 32.6145, -68.2732, 9.0816, 113.523, 27.6946, 0, -84.464, -9.14726, -9.54942, -1.46986, 3.03226, -17.3888, -44.6767, 1.36275, 101.197, 10.1552, -33.3783, 59.0738, -8.33246, -21.0073, 74.6388, 0.354523, -15.0871, 60.8062, 3.55719, 122, 0, -33.4128, 58.3866, -11.5587, -18.1574, 108.154, -0.331224, -4.50761, 48.8037, -0.46357, 94.0043, 28.0368, -12.868, 9.4979, -15, -13.0404, -17.7832, 2.80134, 107.796, -63.4102, -6.43205, 107.53, Average Score: 4861.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.5532, -49.0037, -1.76202, -4.21113, -55, 79.4834, 14.9139, 50.3481, -50.7127, -2.51901, 79.5263, 24.0354, -14.3851, -88, 0.675424, -3, 18.0488, 7.43721, 35.4951, -62.9553, 3.25983, 117.943, 26.5458, 0, -88, -12.4726, -7.62215, -0.979067, 2.85715, -11.346, -45.4702, -2.87532, 102.657, 17.3456, -37.5194, 55.9463, -7.72226, -21.1535, 70.5141, 1.43562, -13.1211, 56.7536, 2.42075, 122, 0, -29.1438, 59.2718, -15, -13.8103, 112.314, -4.16647, -4.58171, 45.4671, 7.43883, 97.9446, 26.3488, -15.1967, 4.09371, -14.7086, -15.3455, -19.7281, 5.88602, 113.046, -71.0301, -9.38643, 108.057, trial: 0, score: 4381 trial: 1, score: 2173 trial: 2, score: 4701 trial: 3, score: 3709 trial: 4, score: 4095 Policy 1: 27.5532, -49.0037, -1.76202, -4.21113, -55, 79.4834, 14.9139, 50.3481, -50.7127, -2.51901, 79.5263, 24.0354, -14.3851, -88, 0.675424, -3, 18.0488, 7.43721, 35.4951, -62.9553, 3.25983, 117.943, 26.5458, 0, -88, -12.4726, -7.62215, -0.979067, 2.85715, -11.346, -45.4702, -2.87532, 102.657, 17.3456, -37.5194, 55.9463, -7.72226, -21.1535, 70.5141, 1.43562, -13.1211, 56.7536, 2.42075, 122, 0, -29.1438, 59.2718, -15, -13.8103, 112.314, -4.16647, -4.58171, 45.4671, 7.43883, 97.9446, 26.3488, -15.1967, 4.09371, -14.7086, -15.3455, -19.7281, 5.88602, 113.046, -71.0301, -9.38643, 108.057, Average Score: 3811.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.5925, -47.4467, -7.02018, -7.97671, -52.0393, 86.7419, 7.57058, 52.6191, -53.3495, -7.54359, 81.4087, 29.4306, -16.0696, -83.0075, 2.01825, -4.89651, 17.1231, 7.58726, 27.6917, -65.6952, 2.76152, 117.589, 25.5578, -2.95919, -87.4013, -8.36702, -9.40169, -0.346534, 9.38868, -12.2405, -45.0536, -2.60946, 101.015, 9.93583, -36.2633, 56.1869, -3.39637, -24.8764, 69.6375, 1.52423, -10.9633, 59.3863, -0.259815, 122, 1.28011, -29.2874, 51.9066, -12.1322, -19.1168, 107.851, -5.02517, 1.40868, 48.2569, 7.60354, 94.7781, 29.3376, -17.3651, 9.54015, -15, -18.4684, -21.7871, 6.9916, 115.282, -64.0677, -10, 111.662, trial: 0, score: 2013 trial: 1, score: 2207 trial: 2, score: 3518 trial: 3, score: 2077 trial: 4, score: 2143 Policy 1: 20.5925, -47.4467, -7.02018, -7.97671, -52.0393, 86.7419, 7.57058, 52.6191, -53.3495, -7.54359, 81.4087, 29.4306, -16.0696, -83.0075, 2.01825, -4.89651, 17.1231, 7.58726, 27.6917, -65.6952, 2.76152, 117.589, 25.5578, -2.95919, -87.4013, -8.36702, -9.40169, -0.346534, 9.38868, -12.2405, -45.0536, -2.60946, 101.015, 9.93583, -36.2633, 56.1869, -3.39637, -24.8764, 69.6375, 1.52423, -10.9633, 59.3863, -0.259815, 122, 1.28011, -29.2874, 51.9066, -12.1322, -19.1168, 107.851, -5.02517, 1.40868, 48.2569, 7.60354, 94.7781, 29.3376, -17.3651, 9.54015, -15, -18.4684, -21.7871, 6.9916, 115.282, -64.0677, -10, 111.662, Average Score: 2391.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.8033, -48.6313, -5.96453, -1.99889, -52.8142, 79.7025, 15.1303, 50.2704, -49.6053, -5.49301, 81.9304, 26.3867, -20.2742, -85.3487, -1.26814, -3, 25.1138, 3.55105, 34.6038, -59.9574, 11.8515, 115.971, 21.6243, -1.89913, -85.0454, -10.2699, -10.6345, -0.0857934, 4.0965, -10.9609, -36.5797, -1.19057, 100.888, 13.2, -39.539, 55.8475, -7.86494, -19.0827, 68.6715, 6.17929, -10.4252, 58.861, -0.5483, 122, 2.45024, -34.206, 54.2118, -12.2581, -13.0157, 107.762, -3.08472, 0.919176, 50.1889, 2.93085, 100.77, 30.3426, -10.8206, 3.92717, -11.4388, -15.7709, -16.2423, 5.31952, 107.579, -70.498, -10, 109.985, trial: 0, score: 2463 trial: 1, score: 3678 trial: 2, score: 3382 trial: 3, score: 3069 trial: 4, score: 2845 Policy 1: 20.8033, -48.6313, -5.96453, -1.99889, -52.8142, 79.7025, 15.1303, 50.2704, -49.6053, -5.49301, 81.9304, 26.3867, -20.2742, -85.3487, -1.26814, -3, 25.1138, 3.55105, 34.6038, -59.9574, 11.8515, 115.971, 21.6243, -1.89913, -85.0454, -10.2699, -10.6345, -0.0857934, 4.0965, -10.9609, -36.5797, -1.19057, 100.888, 13.2, -39.539, 55.8475, -7.86494, -19.0827, 68.6715, 6.17929, -10.4252, 58.861, -0.5483, 122, 2.45024, -34.206, 54.2118, -12.2581, -13.0157, 107.762, -3.08472, 0.919176, 50.1889, 2.93085, 100.77, 30.3426, -10.8206, 3.92717, -11.4388, -15.7709, -16.2423, 5.31952, 107.579, -70.498, -10, 109.985, Average Score: 3087.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.3356, -44.3922, -3.35282, -6.30265, -55, 79.0422, 10.5107, 55.4206, -53.3695, -4.90547, 76.5272, 22.4285, -19.3987, -85.9309, 0.138598, -6.06923, 23.0413, 0.464787, 32.1597, -67.9648, 11.8627, 116.772, 28.4685, -0.985982, -88, -7.78028, -7.76099, -2.73074, 2.24714, -8.26234, -42.6812, 3.59237, 106.338, 15.1699, -37.7586, 58.2201, -7.38776, -19.8606, 68.7901, -0.561249, -10.8079, 56.5316, 0.243697, 122, 1.39223, -25.9008, 50.9747, -15, -18.0146, 110.854, -4.88127, -3.98029, 51.1661, 4.27816, 99.8849, 27.1257, -16.9614, 3.22897, -12.1794, -12.1792, -18.7993, 2.26152, 110.223, -68.2411, -8.73939, 112.57, trial: 0, score: 3198 trial: 1, score: 4534 trial: 2, score: 4829 trial: 3, score: 5149 trial: 4, score: 5245 Policy 1: 27.3356, -44.3922, -3.35282, -6.30265, -55, 79.0422, 10.5107, 55.4206, -53.3695, -4.90547, 76.5272, 22.4285, -19.3987, -85.9309, 0.138598, -6.06923, 23.0413, 0.464787, 32.1597, -67.9648, 11.8627, 116.772, 28.4685, -0.985982, -88, -7.78028, -7.76099, -2.73074, 2.24714, -8.26234, -42.6812, 3.59237, 106.338, 15.1699, -37.7586, 58.2201, -7.38776, -19.8606, 68.7901, -0.561249, -10.8079, 56.5316, 0.243697, 122, 1.39223, -25.9008, 50.9747, -15, -18.0146, 110.854, -4.88127, -3.98029, 51.1661, 4.27816, 99.8849, 27.1257, -16.9614, 3.22897, -12.1794, -12.1792, -18.7993, 2.26152, 110.223, -68.2411, -8.73939, 112.57, Average Score: 4591 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 25.2969, -45.8745, -6.16344, -4.5257, -55, 80.9696, 7.12347, 52.7322, -50.7191, -6.97506, 81.7827, 30.9177, -15.2221, -88, -2.97685, -3, 20.3044, 2.9255, 35.7239, -67.869, 7.61175, 113.961, 21.8805, 0, -84.9096, -8.90182, -12.654, -6.55091, 3.99062, -17.0692, -41.3878, -1.59073, 107.885, 16.0217, -35.6827, 56.5653, -0.688952, -24.8384, 72.6469, 0.45716, -9.00559, 53.9501, -4.08637, 122, 1.51519, -32.6667, 51.9403, -10.8591, -18.3826, 108.389, -7.27256, 0.250112, 45.2471, 9.17115, 94.1017, 29.0285, -9.84254, 8.51377, -15, -14.7815, -18.5889, 5.5314, 109.119, -64.9165, -5.99698, 112.91, trial: 0, score: 4156 trial: 1, score: 4278 trial: 2, score: 3997 trial: 3, score: 4991 trial: 4, score: 4221 Policy 1: 25.2969, -45.8745, -6.16344, -4.5257, -55, 80.9696, 7.12347, 52.7322, -50.7191, -6.97506, 81.7827, 30.9177, -15.2221, -88, -2.97685, -3, 20.3044, 2.9255, 35.7239, -67.869, 7.61175, 113.961, 21.8805, 0, -84.9096, -8.90182, -12.654, -6.55091, 3.99062, -17.0692, -41.3878, -1.59073, 107.885, 16.0217, -35.6827, 56.5653, -0.688952, -24.8384, 72.6469, 0.45716, -9.00559, 53.9501, -4.08637, 122, 1.51519, -32.6667, 51.9403, -10.8591, -18.3826, 108.389, -7.27256, 0.250112, 45.2471, 9.17115, 94.1017, 29.0285, -9.84254, 8.51377, -15, -14.7815, -18.5889, 5.5314, 109.119, -64.9165, -5.99698, 112.91, Average Score: 4328.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 24.5011, -43.7957, -6.6625, -1.35826, -52.9048, 83.4555, 5.74042, 58.7126, -44.821, -3.88558, 76.3964, 28.8215, -23.3955, -88, -3.3801, -3.71952, 16.415, 0.480182, 34.0493, -64.5774, 7.33542, 117.882, 27.8787, -1.03766, -82.5955, -12.5422, -13.4074, -1.64509, 2.75855, -10.6876, -37.7209, 4.59142, 110.515, 17.9927, -42.6641, 56.4208, -3.05257, -26.8351, 69.451, 6.84293, -13.7949, 58.5231, 1.65768, 122, 0, -27.7242, 57.2707, -11.9062, -17.2762, 106.184, -6.07612, -0.742136, 45.2868, 0.381095, 99.2063, 26.8502, -10.2282, 8.85646, -11.2868, -11.1577, -18.3055, 8.04149, 113.115, -64.5744, -10, 113.517, trial: 0, score: 3229 trial: 1, score: 2397 trial: 2, score: 2493 trial: 3, score: 2013 trial: 4, score: 3197 Policy 1: 24.5011, -43.7957, -6.6625, -1.35826, -52.9048, 83.4555, 5.74042, 58.7126, -44.821, -3.88558, 76.3964, 28.8215, -23.3955, -88, -3.3801, -3.71952, 16.415, 0.480182, 34.0493, -64.5774, 7.33542, 117.882, 27.8787, -1.03766, -82.5955, -12.5422, -13.4074, -1.64509, 2.75855, -10.6876, -37.7209, 4.59142, 110.515, 17.9927, -42.6641, 56.4208, -3.05257, -26.8351, 69.451, 6.84293, -13.7949, 58.5231, 1.65768, 122, 0, -27.7242, 57.2707, -11.9062, -17.2762, 106.184, -6.07612, -0.742136, 45.2868, 0.381095, 99.2063, 26.8502, -10.2282, 8.85646, -11.2868, -11.1577, -18.3055, 8.04149, 113.115, -64.5744, -10, 113.517, Average Score: 2665.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 28.3059, -50.8355, 1.22732, -2.10766, -55, 78.5384, 9.45365, 51.8964, -44.1618, -8.4454, 75.5363, 21.5219, -22.2129, -88, 3.16158, -4.7019, 18.7849, 8.55375, 27.0949, -68.8667, 5.05675, 113.992, 27.1421, 0, -87.2592, -5.50542, -12.6587, -3.35347, 4.60633, -8.90007, -36.7599, 1.83159, 104.872, 17.7053, -34.5201, 56.4579, -5.46521, -21.5862, 67.3576, 7.18446, -10.0424, 55.5491, -2.69117, 122, 4.15128, -32.551, 59.1805, -15, -10.6232, 108.989, -3.04161, -0.951001, 43.4874, 7.13633, 103.486, 32.8335, -7.61131, 3.64601, -15, -17.1252, -20.0525, 6.07478, 108.459, -68.6724, -10, 112.629, trial: 0, score: 2397 trial: 1, score: 3581 trial: 2, score: 2294 trial: 3, score: 2206 trial: 4, score: 3670 Policy 1: 28.3059, -50.8355, 1.22732, -2.10766, -55, 78.5384, 9.45365, 51.8964, -44.1618, -8.4454, 75.5363, 21.5219, -22.2129, -88, 3.16158, -4.7019, 18.7849, 8.55375, 27.0949, -68.8667, 5.05675, 113.992, 27.1421, 0, -87.2592, -5.50542, -12.6587, -3.35347, 4.60633, -8.90007, -36.7599, 1.83159, 104.872, 17.7053, -34.5201, 56.4579, -5.46521, -21.5862, 67.3576, 7.18446, -10.0424, 55.5491, -2.69117, 122, 4.15128, -32.551, 59.1805, -15, -10.6232, 108.989, -3.04161, -0.951001, 43.4874, 7.13633, 103.486, 32.8335, -7.61131, 3.64601, -15, -17.1252, -20.0525, 6.07478, 108.459, -68.6724, -10, 112.629, Average Score: 2829.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.2156, -44.59, -4.48787, -6.04866, -51.6857, 82.9924, 5.81922, 54.152, -53.2305, -6.20898, 75.6066, 21.6313, -18.706, -87.6191, 3.18737, -3, 16.4175, 0.83611, 31.616, -66.7503, 6.65695, 120.541, 20.9891, 0, -88, -12.6564, -11.1574, -5.63136, 1.31533, -11.5991, -42.2904, -3.57399, 101.721, 11.9132, -37.7579, 57.0005, -5.2389, -26.5564, 70.0665, 5.64833, -11.0128, 53.3125, -1.16291, 122, 0, -28.3595, 55.7717, -15, -14.661, 112.718, -1.97073, 0.645145, 45.6067, 1.55431, 103.432, 26.4518, -11.2227, 3.11035, -11.6916, -16.7509, -15.7117, 5.92537, 116.424, -70.1191, -10, 108.236, trial: 0, score: 3006 trial: 1, score: 5398 trial: 2, score: 5079 trial: 3, score: 3359 trial: 4, score: 2301 Policy 1: 27.2156, -44.59, -4.48787, -6.04866, -51.6857, 82.9924, 5.81922, 54.152, -53.2305, -6.20898, 75.6066, 21.6313, -18.706, -87.6191, 3.18737, -3, 16.4175, 0.83611, 31.616, -66.7503, 6.65695, 120.541, 20.9891, 0, -88, -12.6564, -11.1574, -5.63136, 1.31533, -11.5991, -42.2904, -3.57399, 101.721, 11.9132, -37.7579, 57.0005, -5.2389, -26.5564, 70.0665, 5.64833, -11.0128, 53.3125, -1.16291, 122, 0, -28.3595, 55.7717, -15, -14.661, 112.718, -1.97073, 0.645145, 45.6067, 1.55431, 103.432, 26.4518, -11.2227, 3.11035, -11.6916, -16.7509, -15.7117, 5.92537, 116.424, -70.1191, -10, 108.236, Average Score: 3828.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 26.8734, -50.5877, -5.64763, -1.33319, -55, 84.78, 6.35516, 55.8846, -49.8709, -3.6288, 78.1973, 30.2588, -22.6357, -88, 0.421168, -4.02781, 18.8572, 3.83436, 34.1776, -61.2631, 6.71172, 122, 23.9176, -2.97021, -83.9907, -3.82359, -7.82517, -2.72626, 10.0529, -10.6737, -39.35, 2.31607, 110.266, 15.7483, -42.6355, 57.2228, -2.46156, -23.9872, 76.6346, 4.76087, -12.0029, 56.3174, 3.73816, 122, 0, -32.5661, 51.1454, -15, -10.1916, 106.353, -8.4117, 4.65777, 43.4706, -0.258047, 97.6811, 30.8862, -9.31405, 3.0855, -10.627, -11.5369, -21.9725, 3.2335, 116.346, -73.0831, -10, 104.7, trial: 0, score: 1789 trial: 1, score: 2269 trial: 2, score: 1654 trial: 3, score: 1565 trial: 4, score: 1853 Policy 1: 26.8734, -50.5877, -5.64763, -1.33319, -55, 84.78, 6.35516, 55.8846, -49.8709, -3.6288, 78.1973, 30.2588, -22.6357, -88, 0.421168, -4.02781, 18.8572, 3.83436, 34.1776, -61.2631, 6.71172, 122, 23.9176, -2.97021, -83.9907, -3.82359, -7.82517, -2.72626, 10.0529, -10.6737, -39.35, 2.31607, 110.266, 15.7483, -42.6355, 57.2228, -2.46156, -23.9872, 76.6346, 4.76087, -12.0029, 56.3174, 3.73816, 122, 0, -32.5661, 51.1454, -15, -10.1916, 106.353, -8.4117, 4.65777, 43.4706, -0.258047, 97.6811, 30.8862, -9.31405, 3.0855, -10.627, -11.5369, -21.9725, 3.2335, 116.346, -73.0831, -10, 104.7, Average Score: 1826 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.8246, -45.5404, -4.62655, -7.75663, -51.5699, 86.1672, 10.7322, 53.782, -48.2488, -9.40233, 74.8857, 23.5202, -17.9841, -88, 0.28888, -7.56806, 19.6509, 0.773765, 35.4974, -59.8364, 4.92288, 121.289, 27.7172, -4.02037, -84.4954, -9.69676, -5.69967, -3.97945, 7.44529, -11.8351, -45.6731, -3.54224, 106.453, 10.0784, -38.7973, 56.3228, -8.23136, -20.6115, 68.1074, 2.60211, -16.0189, 54.0105, -1.67618, 120.294, 1.15817, -24.288, 49.3659, -12.6354, -19.8751, 112.803, -9.89533, 3.03089, 46.0687, 7.26149, 101.305, 29.6492, -16.4412, 1.42178, -10.9419, -12.1323, -21.4786, 9.09264, 114.294, -63.6845, -5.95374, 107.876, trial: 0, score: 2269 trial: 1, score: 1629 trial: 2, score: 2204 trial: 3, score: 3165 trial: 4, score: 2103 Policy 1: 23.8246, -45.5404, -4.62655, -7.75663, -51.5699, 86.1672, 10.7322, 53.782, -48.2488, -9.40233, 74.8857, 23.5202, -17.9841, -88, 0.28888, -7.56806, 19.6509, 0.773765, 35.4974, -59.8364, 4.92288, 121.289, 27.7172, -4.02037, -84.4954, -9.69676, -5.69967, -3.97945, 7.44529, -11.8351, -45.6731, -3.54224, 106.453, 10.0784, -38.7973, 56.3228, -8.23136, -20.6115, 68.1074, 2.60211, -16.0189, 54.0105, -1.67618, 120.294, 1.15817, -24.288, 49.3659, -12.6354, -19.8751, 112.803, -9.89533, 3.03089, 46.0687, 7.26149, 101.305, 29.6492, -16.4412, 1.42178, -10.9419, -12.1323, -21.4786, 9.09264, 114.294, -63.6845, -5.95374, 107.876, Average Score: 2274 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 28.3486, -45.753, -8.34725, -5.66979, -53.1889, 81.9463, 6.53128, 59.282, -48.2317, -9.487, 73.9575, 28.8395, -23.3378, -87.1954, -6.00846, -3, 20.1236, 6.72599, 35.7802, -63.6369, 11.6941, 117.508, 28.7849, -4.03408, -82.6754, -5.02329, -13.6629, -8.2667, 7.94727, -14.64, -39.0991, -2.19079, 101.851, 15.7594, -41.0225, 55.2499, -4.2224, -25.3641, 75.6864, 8.14111, -10.5028, 61.4454, 3.66334, 122, 0, -25.2823, 59.1106, -15, -12.5518, 104.956, -9.23018, -4.3284, 48.8717, 6.80275, 96.5186, 27.0808, -12.1991, 8.28278, -15, -17.3966, -14.7069, 5.3161, 109.083, -64.9748, -10, 104.085, trial: 0, score: 3709 trial: 1, score: 4598 trial: 2, score: 2942 trial: 3, score: 3991 trial: 4, score: 3903 Policy 1: 28.3486, -45.753, -8.34725, -5.66979, -53.1889, 81.9463, 6.53128, 59.282, -48.2317, -9.487, 73.9575, 28.8395, -23.3378, -87.1954, -6.00846, -3, 20.1236, 6.72599, 35.7802, -63.6369, 11.6941, 117.508, 28.7849, -4.03408, -82.6754, -5.02329, -13.6629, -8.2667, 7.94727, -14.64, -39.0991, -2.19079, 101.851, 15.7594, -41.0225, 55.2499, -4.2224, -25.3641, 75.6864, 8.14111, -10.5028, 61.4454, 3.66334, 122, 0, -25.2823, 59.1106, -15, -12.5518, 104.956, -9.23018, -4.3284, 48.8717, 6.80275, 96.5186, 27.0808, -12.1991, 8.28278, -15, -17.3966, -14.7069, 5.3161, 109.083, -64.9748, -10, 104.085, Average Score: 3828.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 26.6059, -43.6149, -7.53907, -8.96539, -55, 87.2473, 11.7131, 52.7981, -49.7007, -6.29446, 75.1354, 22.4633, -22.2283, -88, -5.85316, -3.40812, 23.4673, 6.2005, 28.7716, -64.1214, 8.63083, 116.717, 23.667, -4.51472, -87.7214, -9.33326, -8.44503, -2.52522, 2.78968, -15.1225, -43.5367, 2.81673, 103.911, 15.4181, -42.8747, 53.3646, -3.38694, -25.8768, 69.6708, 3.71974, -13.3772, 53.2319, 2.3141, 120.494, 0, -32.5709, 55.8661, -15, -12.8982, 112.072, -6.92022, -3.20871, 49.2689, 1.31199, 98.8382, 26.6501, -15.347, 4.90264, -11.9733, -11.6313, -13.9117, 6.77605, 112.843, -72.0957, -6.73237, 104.508, trial: 0, score: 3174 trial: 1, score: 3167 trial: 2, score: 2045 trial: 3, score: 1439 trial: 4, score: 2655 Policy 1: 26.6059, -43.6149, -7.53907, -8.96539, -55, 87.2473, 11.7131, 52.7981, -49.7007, -6.29446, 75.1354, 22.4633, -22.2283, -88, -5.85316, -3.40812, 23.4673, 6.2005, 28.7716, -64.1214, 8.63083, 116.717, 23.667, -4.51472, -87.7214, -9.33326, -8.44503, -2.52522, 2.78968, -15.1225, -43.5367, 2.81673, 103.911, 15.4181, -42.8747, 53.3646, -3.38694, -25.8768, 69.6708, 3.71974, -13.3772, 53.2319, 2.3141, 120.494, 0, -32.5709, 55.8661, -15, -12.8982, 112.072, -6.92022, -3.20871, 49.2689, 1.31199, 98.8382, 26.6501, -15.347, 4.90264, -11.9733, -11.6313, -13.9117, 6.77605, 112.843, -72.0957, -6.73237, 104.508, Average Score: 2496 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 21.5136, -47.9066, -7.5608, -1.99258, -50.8047, 84.3094, 7.35495, 56.5819, -48.7026, -10, 81.9252, 21.9427, -14.2548, -84.2259, 2.9709, -7.16766, 20.6726, 6.62071, 30.3534, -60.4682, 10.5238, 118.367, 29.0055, -4.68391, -87.4211, -5.52965, -11.657, -0.746294, 8.61024, -11.261, -39.4904, 5.16537, 103.623, 13.4175, -33.7625, 55.9771, 0.238055, -26.9393, 72.9662, 2.75583, -9.07024, 55.2059, 4.44517, 119.837, 0.838725, -30.4252, 51.4817, -11.9325, -13.4464, 107.03, -5.85743, -1.73614, 51.9046, 0.786919, 98.0386, 33.0992, -8.06671, 5.78019, -14.769, -18.4837, -17.922, 5.20124, 107.577, -63.3903, -10, 110.473, trial: 0, score: 2301 trial: 1, score: 2173 trial: 2, score: 1693 trial: 3, score: 2653 trial: 4, score: 3389 Policy 1: 21.5136, -47.9066, -7.5608, -1.99258, -50.8047, 84.3094, 7.35495, 56.5819, -48.7026, -10, 81.9252, 21.9427, -14.2548, -84.2259, 2.9709, -7.16766, 20.6726, 6.62071, 30.3534, -60.4682, 10.5238, 118.367, 29.0055, -4.68391, -87.4211, -5.52965, -11.657, -0.746294, 8.61024, -11.261, -39.4904, 5.16537, 103.623, 13.4175, -33.7625, 55.9771, 0.238055, -26.9393, 72.9662, 2.75583, -9.07024, 55.2059, 4.44517, 119.837, 0.838725, -30.4252, 51.4817, -11.9325, -13.4464, 107.03, -5.85743, -1.73614, 51.9046, 0.786919, 98.0386, 33.0992, -8.06671, 5.78019, -14.769, -18.4837, -17.922, 5.20124, 107.577, -63.3903, -10, 110.473, Average Score: 2441.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.7191, -43.2588, -2.95764, -9.43685, -55, 80.4916, 10.6067, 56.4703, -45.2105, -3.06888, 74.8745, 24.5661, -16.2377, -88, 0.660103, -3, 17.6147, 4.37488, 33.2045, -67.8521, 11.7214, 118.773, 24.7848, -1.21004, -82.7599, -6.80971, -8.63256, -3.76709, 7.23402, -13.6135, -36.3911, 1.61626, 108.006, 10.1006, -42.0219, 51.9565, -2.78287, -28.1835, 72.9174, 4.94756, -9.75331, 54.033, 2.5215, 122, 0, -29.7625, 56.949, -11.3401, -11.2129, 111.499, -3.51938, -4.91418, 44.8843, 5.34038, 95.6101, 27.9718, -7.70516, 2.98756, -10.727, -15.4229, -13.5284, 8.34103, 115.489, -64.7664, -10, 107.051, trial: 0, score: 3933 trial: 1, score: 4479 trial: 2, score: 4023 trial: 3, score: 4701 trial: 4, score: 4255 Policy 1: 27.7191, -43.2588, -2.95764, -9.43685, -55, 80.4916, 10.6067, 56.4703, -45.2105, -3.06888, 74.8745, 24.5661, -16.2377, -88, 0.660103, -3, 17.6147, 4.37488, 33.2045, -67.8521, 11.7214, 118.773, 24.7848, -1.21004, -82.7599, -6.80971, -8.63256, -3.76709, 7.23402, -13.6135, -36.3911, 1.61626, 108.006, 10.1006, -42.0219, 51.9565, -2.78287, -28.1835, 72.9174, 4.94756, -9.75331, 54.033, 2.5215, 122, 0, -29.7625, 56.949, -11.3401, -11.2129, 111.499, -3.51938, -4.91418, 44.8843, 5.34038, 95.6101, 27.9718, -7.70516, 2.98756, -10.727, -15.4229, -13.5284, 8.34103, 115.489, -64.7664, -10, 107.051, Average Score: 4278.2 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 21.7773, -48.6625, -7.81258, -2.40924, -53.1054, 84.9118, 14.727, 49.816, -52.4952, -10, 75.8768, 21.895, -17.6629, -86.7045, -2.19158, -7.56858, 22.4541, 2.78994, 31.0481, -59.7806, 10.2877, 120.843, 20.2584, 0, -85.0921, -11.791, -13.5922, -4.9691, 0.657726, -14.8508, -36.1486, 5.64441, 105.261, 11.37, -41.4842, 51.2384, -0.713257, -20.9075, 73.6206, 4.18749, -7.62081, 61.6346, -2.40328, 122, 0, -29.4828, 58.198, -14.6895, -19.9875, 113.274, -2.77958, 2.78423, 48.3544, 6.77276, 101.616, 33.7995, -12.0864, 7.67433, -13.2792, -19.1004, -17.3695, 4.04344, 112.761, -68.2172, -7.15257, 108.25, trial: 0, score: 1501 trial: 1, score: 2077 trial: 2, score: 1951 trial: 3, score: 3359 trial: 4, score: 2301 Policy 1: 21.7773, -48.6625, -7.81258, -2.40924, -53.1054, 84.9118, 14.727, 49.816, -52.4952, -10, 75.8768, 21.895, -17.6629, -86.7045, -2.19158, -7.56858, 22.4541, 2.78994, 31.0481, -59.7806, 10.2877, 120.843, 20.2584, 0, -85.0921, -11.791, -13.5922, -4.9691, 0.657726, -14.8508, -36.1486, 5.64441, 105.261, 11.37, -41.4842, 51.2384, -0.713257, -20.9075, 73.6206, 4.18749, -7.62081, 61.6346, -2.40328, 122, 0, -29.4828, 58.198, -14.6895, -19.9875, 113.274, -2.77958, 2.78423, 48.3544, 6.77276, 101.616, 33.7995, -12.0864, 7.67433, -13.2792, -19.1004, -17.3695, 4.04344, 112.761, -68.2172, -7.15257, 108.25, Average Score: 2237.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.2212, -42.6297, -7.03197, -1.62165, -49.085, 83.9049, 9.74151, 57.6472, -50.1868, -3.12766, 73.7895, 29.2093, -21.1387, -88, -4.65734, -4.45091, 21.331, 0.0801984, 36.7385, -60.6333, 7.90133, 113.36, 24.4723, -4.89624, -85.0287, -10.2196, -11.4287, -6.40249, 10.0071, -12.842, -36.8983, -1.15922, 103.419, 14.2309, -39.6511, 59.6142, -5.30976, -20.886, 76.1244, 1.29056, -7.76307, 61.1592, -3.46777, 122, 0.838972, -31.147, 58.8318, -15, -13.0594, 106.708, -9.54571, -2.78507, 43.97, 7.83881, 100.501, 26.0875, -12.9534, 3.72202, -12.0102, -13.236, -14.3763, 5.68287, 113.17, -65.8107, -10, 113.34, trial: 0, score: 2326 trial: 1, score: 2781 trial: 2, score: 1821 trial: 3, score: 1952 trial: 4, score: 1726 Policy 1: 20.2212, -42.6297, -7.03197, -1.62165, -49.085, 83.9049, 9.74151, 57.6472, -50.1868, -3.12766, 73.7895, 29.2093, -21.1387, -88, -4.65734, -4.45091, 21.331, 0.0801984, 36.7385, -60.6333, 7.90133, 113.36, 24.4723, -4.89624, -85.0287, -10.2196, -11.4287, -6.40249, 10.0071, -12.842, -36.8983, -1.15922, 103.419, 14.2309, -39.6511, 59.6142, -5.30976, -20.886, 76.1244, 1.29056, -7.76307, 61.1592, -3.46777, 122, 0.838972, -31.147, 58.8318, -15, -13.0594, 106.708, -9.54571, -2.78507, 43.97, 7.83881, 100.501, 26.0875, -12.9534, 3.72202, -12.0102, -13.236, -14.3763, 5.68287, 113.17, -65.8107, -10, 113.34, Average Score: 2121.2 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.7796, -46.2041, -3.04082, -8.21383, -53.8333, 80.4539, 9.82809, 53.7427, -50.9359, -4.83617, 79.8183, 26.6064, -16.4755, -88, -1.59578, -3, 25.4614, 0.563688, 31.0287, -63.002, 9.7245, 117.684, 20.5573, 0, -88, -3.48795, -6.43626, -6.94627, 2.64128, -11.9649, -38.8464, 2.5576, 105.561, 15.8712, -37.741, 52.2893, -1.66915, -19.69, 70.7178, 2.01895, -11.6812, 58.3246, 1.35566, 120.1, 4.0646, -28.905, 50.5293, -15, -13.5001, 105.447, -7.86188, 0.714222, 45.4668, 3.26624, 99.8678, 29.3648, -8.40556, 9.05953, -13.8204, -17.5519, -18.515, 8.15823, 114.85, -66.9693, -10, 105.203, trial: 0, score: 3421 trial: 1, score: 3774 trial: 2, score: 4413 trial: 3, score: 3871 trial: 4, score: 4445 Policy 1: 23.7796, -46.2041, -3.04082, -8.21383, -53.8333, 80.4539, 9.82809, 53.7427, -50.9359, -4.83617, 79.8183, 26.6064, -16.4755, -88, -1.59578, -3, 25.4614, 0.563688, 31.0287, -63.002, 9.7245, 117.684, 20.5573, 0, -88, -3.48795, -6.43626, -6.94627, 2.64128, -11.9649, -38.8464, 2.5576, 105.561, 15.8712, -37.741, 52.2893, -1.66915, -19.69, 70.7178, 2.01895, -11.6812, 58.3246, 1.35566, 120.1, 4.0646, -28.905, 50.5293, -15, -13.5001, 105.447, -7.86188, 0.714222, 45.4668, 3.26624, 99.8678, 29.3648, -8.40556, 9.05953, -13.8204, -17.5519, -18.515, 8.15823, 114.85, -66.9693, -10, 105.203, Average Score: 3984.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 29.0845, -47.6985, -2.77488, -6.66719, -55, 88.0352, 13.4959, 57.1561, -50.1886, -7.64583, 72.6963, 22.8669, -14.0292, -84.7808, 0.49918, -3.41025, 20.7478, 8.7716, 36.4935, -67.2319, 7.33493, 122, 21.582, -4.77935, -85.3751, -9.91736, -12.2171, -4.61624, 9.9054, -12.2831, -44.57, -3.15354, 103.594, 18.3627, -40.1297, 56.5976, 1.0894, -21.9527, 68.4032, -0.928908, -7.20925, 59.3031, -3.37202, 117.166, 3.73283, -30.5314, 59.0742, -11.6543, -12.6988, 108.783, -6.2016, 3.77415, 51.5424, -0.560441, 96.5427, 25.4236, -10.7564, 2.14639, -12.9377, -12.0116, -15.4091, 0.374363, 106.846, -70.1554, -10, 111.508, trial: 0, score: 3742 trial: 1, score: 3799 trial: 2, score: 3581 trial: 3, score: 3486 trial: 4, score: 2429 Policy 1: 29.0845, -47.6985, -2.77488, -6.66719, -55, 88.0352, 13.4959, 57.1561, -50.1886, -7.64583, 72.6963, 22.8669, -14.0292, -84.7808, 0.49918, -3.41025, 20.7478, 8.7716, 36.4935, -67.2319, 7.33493, 122, 21.582, -4.77935, -85.3751, -9.91736, -12.2171, -4.61624, 9.9054, -12.2831, -44.57, -3.15354, 103.594, 18.3627, -40.1297, 56.5976, 1.0894, -21.9527, 68.4032, -0.928908, -7.20925, 59.3031, -3.37202, 117.166, 3.73283, -30.5314, 59.0742, -11.6543, -12.6988, 108.783, -6.2016, 3.77415, 51.5424, -0.560441, 96.5427, 25.4236, -10.7564, 2.14639, -12.9377, -12.0116, -15.4091, 0.374363, 106.846, -70.1554, -10, 111.508, Average Score: 3407.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 22.1163, -43.5045, -0.461807, -5.1096, -53.1705, 78.5212, 11.4956, 53.6646, -49.5505, -10, 74.2853, 24.0451, -16.3146, -88, -3.65752, -3, 18.3347, 7.59511, 35.9412, -62.5007, 3.17613, 115.754, 27.0701, -3.63556, -85.0892, -11.5265, -5.70987, -2.84122, 4.86793, -13.0776, -39.839, 4.4137, 110.662, 9.91096, -42.5296, 50.7502, -3.43659, -27.7284, 69.2361, 7.07343, -15.9518, 57.5246, 3.23613, 122, 0.205127, -31.3122, 50.431, -15, -18.4116, 108.678, -1.47467, -1.13059, 48.7891, 6.54115, 95.8566, 29.2482, -10.2606, 5.42293, -15, -11.5303, -14.3604, 9.89476, 116.685, -64.7302, -9.5073, 110.3, trial: 0, score: 2678 trial: 1, score: 3933 trial: 2, score: 2718 trial: 3, score: 2846 trial: 4, score: 4574 Policy 1: 22.1163, -43.5045, -0.461807, -5.1096, -53.1705, 78.5212, 11.4956, 53.6646, -49.5505, -10, 74.2853, 24.0451, -16.3146, -88, -3.65752, -3, 18.3347, 7.59511, 35.9412, -62.5007, 3.17613, 115.754, 27.0701, -3.63556, -85.0892, -11.5265, -5.70987, -2.84122, 4.86793, -13.0776, -39.839, 4.4137, 110.662, 9.91096, -42.5296, 50.7502, -3.43659, -27.7284, 69.2361, 7.07343, -15.9518, 57.5246, 3.23613, 122, 0.205127, -31.3122, 50.431, -15, -18.4116, 108.678, -1.47467, -1.13059, 48.7891, 6.54115, 95.8566, 29.2482, -10.2606, 5.42293, -15, -11.5303, -14.3604, 9.89476, 116.685, -64.7302, -9.5073, 110.3, Average Score: 3349.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.1917, -48.0686, 0.343752, -7.41394, -54.1486, 83.1939, 15.2851, 50.1554, -51.1978, -10, 76.4395, 29.1802, -23.0987, -88, -0.544895, -3, 17.4249, 6.12012, 31.6896, -60.5942, 2.90784, 115.069, 25.6358, 0, -85.3271, -5.32165, -7.17494, -1.32589, 6.51561, -7.98688, -45.5713, 2.80508, 101.49, 18.213, -39.7874, 59.307, -0.670464, -21.9304, 68.7569, 0.0201019, -11.6844, 59.2778, 2.80813, 122, 0, -31.2349, 58.4271, -10.9951, -14.0931, 105.033, -9.57937, -1.89546, 52.9228, 7.9141, 96.9381, 27.6239, -12.0708, 9.51515, -11.5483, -16.7429, -17.7383, 0.262765, 115.996, -65.0659, -5.05284, 110.283, trial: 0, score: 2205 trial: 1, score: 1821 trial: 2, score: 2047 trial: 3, score: 1981 trial: 4, score: 1725 Policy 1: 23.1917, -48.0686, 0.343752, -7.41394, -54.1486, 83.1939, 15.2851, 50.1554, -51.1978, -10, 76.4395, 29.1802, -23.0987, -88, -0.544895, -3, 17.4249, 6.12012, 31.6896, -60.5942, 2.90784, 115.069, 25.6358, 0, -85.3271, -5.32165, -7.17494, -1.32589, 6.51561, -7.98688, -45.5713, 2.80508, 101.49, 18.213, -39.7874, 59.307, -0.670464, -21.9304, 68.7569, 0.0201019, -11.6844, 59.2778, 2.80813, 122, 0, -31.2349, 58.4271, -10.9951, -14.0931, 105.033, -9.57937, -1.89546, 52.9228, 7.9141, 96.9381, 27.6239, -12.0708, 9.51515, -11.5483, -16.7429, -17.7383, 0.262765, 115.996, -65.0659, -5.05284, 110.283, Average Score: 1955.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 26.2192, -46.0002, -8.22981, -0.864054, -55, 81.2659, 11.8608, 58.2169, -47.4961, -7.05084, 78.0615, 29.0542, -14.1111, -83.5302, -5.91359, -3, 18.8028, 0.532379, 31.2678, -68.2467, 4.65595, 115.371, 25.1079, -0.966801, -84.1114, -12.1129, -7.24612, -3.59253, 6.20851, -16.8693, -38.7556, 4.70136, 104.144, 16.2457, -33.8814, 56.275, -5.29186, -19.3201, 76.7924, 3.28989, -9.7673, 55.3026, -2.26587, 122, 3.60185, -31.5109, 53.0164, -14.5797, -10.8677, 109.199, -4.48577, -2.99992, 50.9279, 3.0995, 102.95, 24.7725, -15.5118, 5.82828, -15, -13.4385, -15.4688, 1.61255, 110.46, -70.4815, -5.85172, 105.331, trial: 0, score: 4029 trial: 1, score: 3261 trial: 2, score: 4765 trial: 3, score: 5053 trial: 4, score: 4959 Policy 1: 26.2192, -46.0002, -8.22981, -0.864054, -55, 81.2659, 11.8608, 58.2169, -47.4961, -7.05084, 78.0615, 29.0542, -14.1111, -83.5302, -5.91359, -3, 18.8028, 0.532379, 31.2678, -68.2467, 4.65595, 115.371, 25.1079, -0.966801, -84.1114, -12.1129, -7.24612, -3.59253, 6.20851, -16.8693, -38.7556, 4.70136, 104.144, 16.2457, -33.8814, 56.275, -5.29186, -19.3201, 76.7924, 3.28989, -9.7673, 55.3026, -2.26587, 122, 3.60185, -31.5109, 53.0164, -14.5797, -10.8677, 109.199, -4.48577, -2.99992, 50.9279, 3.0995, 102.95, 24.7725, -15.5118, 5.82828, -15, -13.4385, -15.4688, 1.61255, 110.46, -70.4815, -5.85172, 105.331, Average Score: 4413.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.1215, -45.2646, -0.82969, -3.07469, -51.6901, 81.6477, 11.6429, 53.864, -53.339, -10, 73.5194, 25.997, -20.7058, -88, -0.921391, -4.53389, 16.1532, -0.0686475, 32.6057, -62.0058, 11.0407, 120.402, 21.4698, 0, -88, -11.9007, -9.71418, -6.3764, 8.10225, -17.1933, -44.4927, -0.367244, 107.393, 11.4933, -37.5377, 51.5857, -3.34207, -27.1972, 70.7871, 2.81941, -7.80267, 59.1067, -3.03945, 122, 0, -33.6128, 53.1993, -15, -10.2708, 111.76, -9.30878, 4.48314, 44.7979, 3.61155, 101.292, 26.1947, -12.031, 8.59426, -14.886, -12.1064, -12.3775, 5.80885, 107.105, -72.713, -8.56532, 106.964, trial: 0, score: 2430 trial: 1, score: 2678 trial: 2, score: 1565 trial: 3, score: 3935 trial: 4, score: 3702 Policy 1: 20.1215, -45.2646, -0.82969, -3.07469, -51.6901, 81.6477, 11.6429, 53.864, -53.339, -10, 73.5194, 25.997, -20.7058, -88, -0.921391, -4.53389, 16.1532, -0.0686475, 32.6057, -62.0058, 11.0407, 120.402, 21.4698, 0, -88, -11.9007, -9.71418, -6.3764, 8.10225, -17.1933, -44.4927, -0.367244, 107.393, 11.4933, -37.5377, 51.5857, -3.34207, -27.1972, 70.7871, 2.81941, -7.80267, 59.1067, -3.03945, 122, 0, -33.6128, 53.1993, -15, -10.2708, 111.76, -9.30878, 4.48314, 44.7979, 3.61155, 101.292, 26.1947, -12.031, 8.59426, -14.886, -12.1064, -12.3775, 5.80885, 107.105, -72.713, -8.56532, 106.964, Average Score: 2862 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 28.963, -50.2412, -7.00934, -8.58686, -52.3927, 86.7037, 11.4123, 59.1598, -49.5853, -5.21074, 76.284, 29.8056, -20.3541, -88, 0.22599, -3, 20.0038, 2.4008, 35.4453, -69.2378, 9.67124, 115.464, 28.1508, -1.98719, -88, -12.5887, -7.48404, -2.27334, 5.63241, -14.444, -42.9935, 4.77366, 107.253, 18.4444, -33.4893, 51.9528, -1.32547, -27.678, 69.1552, -1.60139, -12.0511, 54.3501, -2.42117, 122, 0, -26.8204, 57.6061, -12.7155, -19.1133, 106.973, -2.24063, -1.28193, 47.9894, -0.285866, 96.791, 33.5842, -9.59004, 2.55216, -15, -14.5618, -19.6215, 0.333228, 116.031, -72.3856, -10, 112.128, trial: 0, score: 3869 trial: 1, score: 4478 trial: 2, score: 3743 trial: 3, score: 3903 trial: 4, score: 4189 Policy 1: 28.963, -50.2412, -7.00934, -8.58686, -52.3927, 86.7037, 11.4123, 59.1598, -49.5853, -5.21074, 76.284, 29.8056, -20.3541, -88, 0.22599, -3, 20.0038, 2.4008, 35.4453, -69.2378, 9.67124, 115.464, 28.1508, -1.98719, -88, -12.5887, -7.48404, -2.27334, 5.63241, -14.444, -42.9935, 4.77366, 107.253, 18.4444, -33.4893, 51.9528, -1.32547, -27.678, 69.1552, -1.60139, -12.0511, 54.3501, -2.42117, 122, 0, -26.8204, 57.6061, -12.7155, -19.1133, 106.973, -2.24063, -1.28193, 47.9894, -0.285866, 96.791, 33.5842, -9.59004, 2.55216, -15, -14.5618, -19.6215, 0.333228, 116.031, -72.3856, -10, 112.128, Average Score: 4036.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 22.822, -44.8134, -5.13536, -1.77728, -53.8941, 84.3538, 7.00076, 58.5065, -48.8168, -3.84296, 75.294, 23.9407, -17.0776, -83.3888, -4.85695, -5.51817, 19.2009, 1.57554, 34.6533, -61.4092, 2.92526, 118.548, 22.2296, -0.419903, -83.3292, -11.9094, -10.5765, -1.72228, 4.23971, -14.5197, -36.1631, -3.01986, 105.633, 10.7906, -36.0238, 54.9828, -4.38698, -28.612, 75.2549, -0.391835, -12.3037, 55.7556, -4.02027, 122, 3.70355, -27.0131, 57.0013, -15, -12.0673, 105.639, -3.63808, 4.20767, 45.8363, 0.673812, 102.573, 25.7638, -10.9382, 4.73194, -10.5619, -15.4783, -15.0803, 5.39995, 110.333, -70.5133, -10, 106.242, trial: 0, score: 3494 trial: 1, score: 1982 trial: 2, score: 2046 trial: 3, score: 1878 trial: 4, score: 1600 Policy 1: 22.822, -44.8134, -5.13536, -1.77728, -53.8941, 84.3538, 7.00076, 58.5065, -48.8168, -3.84296, 75.294, 23.9407, -17.0776, -83.3888, -4.85695, -5.51817, 19.2009, 1.57554, 34.6533, -61.4092, 2.92526, 118.548, 22.2296, -0.419903, -83.3292, -11.9094, -10.5765, -1.72228, 4.23971, -14.5197, -36.1631, -3.01986, 105.633, 10.7906, -36.0238, 54.9828, -4.38698, -28.612, 75.2549, -0.391835, -12.3037, 55.7556, -4.02027, 122, 3.70355, -27.0131, 57.0013, -15, -12.0673, 105.639, -3.63808, 4.20767, 45.8363, 0.673812, 102.573, 25.7638, -10.9382, 4.73194, -10.5619, -15.4783, -15.0803, 5.39995, 110.333, -70.5133, -10, 106.242, Average Score: 2200 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.7164, -45.0904, 0.617288, -6.60898, -52.8004, 79.9152, 5.67072, 49.8992, -45.3227, -2.10559, 78.9793, 29.9537, -21.5946, -88, -1.15904, -3, 25.7399, 4.74402, 34.3665, -65.4417, 7.87546, 116.385, 27.2315, 0, -88, -3.24308, -6.85599, 0.371214, 3.7903, -14.1812, -44.4336, 3.12192, 103.748, 11.9353, -42.8227, 58.2138, -4.68898, -22.1726, 68.2481, -1.34896, -16.3932, 61.6138, 2.92792, 120.662, 4.32195, -29.1588, 58.965, -14.8259, -11.3672, 112.09, -8.22177, 4.92034, 46.8171, 2.68922, 95.4314, 34.3725, -9.82615, 2.93725, -15, -16.4651, -15.6304, 6.6816, 110.449, -67.9019, -7.93596, 110.299, trial: 0, score: 3031 trial: 1, score: 2845 trial: 2, score: 1982 trial: 3, score: 1695 trial: 4, score: 4061 Policy 1: 23.7164, -45.0904, 0.617288, -6.60898, -52.8004, 79.9152, 5.67072, 49.8992, -45.3227, -2.10559, 78.9793, 29.9537, -21.5946, -88, -1.15904, -3, 25.7399, 4.74402, 34.3665, -65.4417, 7.87546, 116.385, 27.2315, 0, -88, -3.24308, -6.85599, 0.371214, 3.7903, -14.1812, -44.4336, 3.12192, 103.748, 11.9353, -42.8227, 58.2138, -4.68898, -22.1726, 68.2481, -1.34896, -16.3932, 61.6138, 2.92792, 120.662, 4.32195, -29.1588, 58.965, -14.8259, -11.3672, 112.09, -8.22177, 4.92034, 46.8171, 2.68922, 95.4314, 34.3725, -9.82615, 2.93725, -15, -16.4651, -15.6304, 6.6816, 110.449, -67.9019, -7.93596, 110.299, Average Score: 2722.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 23.7205, -49.3432, -0.940347, -6.26322, -53.4009, 79.7143, 7.06179, 52.4163, -50.5759, -5.09983, 79.6856, 24.612, -20.3241, -88, -1.09452, -3, 25.0328, 8.51855, 36.3404, -59.8031, 5.09981, 120.948, 19.8105, -5.6305, -84.4824, -12.6304, -11.7488, 0.989441, 7.31555, -14.3029, -45.5674, 6.2789, 109.269, 14.9158, -39.5453, 56.6906, -4.47143, -19.7575, 75.2341, 6.19971, -16.2658, 58.0142, -0.286718, 122, 1.29293, -28.333, 56.3768, -12.2777, -15.1101, 112.472, -8.30809, 2.79505, 46.4258, 7.88222, 97.0227, 25.5354, -16.9943, 10.1829, -11.2737, -14.9371, -12.7936, 5.58828, 114.697, -63.9508, -10, 111.464, trial: 0, score: 2504 trial: 1, score: 2141 trial: 2, score: 1757 trial: 3, score: 1917 trial: 4, score: 2493 Policy 1: 23.7205, -49.3432, -0.940347, -6.26322, -53.4009, 79.7143, 7.06179, 52.4163, -50.5759, -5.09983, 79.6856, 24.612, -20.3241, -88, -1.09452, -3, 25.0328, 8.51855, 36.3404, -59.8031, 5.09981, 120.948, 19.8105, -5.6305, -84.4824, -12.6304, -11.7488, 0.989441, 7.31555, -14.3029, -45.5674, 6.2789, 109.269, 14.9158, -39.5453, 56.6906, -4.47143, -19.7575, 75.2341, 6.19971, -16.2658, 58.0142, -0.286718, 122, 1.29293, -28.333, 56.3768, -12.2777, -15.1101, 112.472, -8.30809, 2.79505, 46.4258, 7.88222, 97.0227, 25.5354, -16.9943, 10.1829, -11.2737, -14.9371, -12.7936, 5.58828, 114.697, -63.9508, -10, 111.464, Average Score: 2162.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 29.0668, -49.134, -0.26705, -6.16095, -55, 78.9424, 8.55602, 51.8083, -52.4681, -9.78984, 73.369, 28.0336, -19.8498, -88, -3.07047, -3, 21.8387, 1.16278, 33.2399, -67.2394, 5.58959, 121.016, 20.274, 0, -86.6996, -4.99888, -11.3815, 1.18863, 4.29542, -10.5969, -39.6403, -0.635161, 104.45, 16.3423, -33.233, 58.8478, -1.38947, -19.4802, 70.478, -0.935442, -9.68228, 61.674, 3.64072, 122, 0.931982, -29.2554, 53.9056, -12.2004, -19.903, 110.206, -8.42338, 0.279738, 48.1572, 7.00875, 101.812, 27.8528, -10.7933, 5.12499, -15, -16.3427, -21.6231, 7.02221, 109.892, -63.7202, -10, 112.409, trial: 0, score: 3485 trial: 1, score: 2397 trial: 2, score: 3741 trial: 3, score: 1918 trial: 4, score: 2589 Policy 1: 29.0668, -49.134, -0.26705, -6.16095, -55, 78.9424, 8.55602, 51.8083, -52.4681, -9.78984, 73.369, 28.0336, -19.8498, -88, -3.07047, -3, 21.8387, 1.16278, 33.2399, -67.2394, 5.58959, 121.016, 20.274, 0, -86.6996, -4.99888, -11.3815, 1.18863, 4.29542, -10.5969, -39.6403, -0.635161, 104.45, 16.3423, -33.233, 58.8478, -1.38947, -19.4802, 70.478, -0.935442, -9.68228, 61.674, 3.64072, 122, 0.931982, -29.2554, 53.9056, -12.2004, -19.903, 110.206, -8.42338, 0.279738, 48.1572, 7.00875, 101.812, 27.8528, -10.7933, 5.12499, -15, -16.3427, -21.6231, 7.02221, 109.892, -63.7202, -10, 112.409, Average Score: 2826 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 26.2162, -50.3581, 0.218147, -1.28431, -50.7606, 83.5639, 8.61133, 56.6002, -44.4029, -2.18281, 78.4727, 25.6517, -20.2519, -88, 0.934035, -5.2906, 22.1183, 9.46125, 36.2735, -67.4181, 11.0981, 113.658, 21.1044, 0, -85.8164, -3.51535, -5.59112, 0.0455048, 6.95598, -8.35736, -38.0228, 0.742913, 108.014, 17.3178, -38.7762, 56.9732, -1.36868, -20.6842, 75.4331, -0.112162, -12.8762, 59.7669, 3.40466, 122, 1.0445, -33.0866, 51.0051, -11.5433, -13.7036, 107.788, -6.05687, 0.65257, 49.9144, -0.0188637, 94.8264, 33.9084, -13.9412, 6.44064, -15, -10.882, -19.0745, 7.2782, 113.556, -66.1021, -7.33981, 107.01, trial: 0, score: 4413 trial: 1, score: 4541 trial: 2, score: 4990 trial: 3, score: 4798 trial: 4, score: 4799 Policy 1: 26.2162, -50.3581, 0.218147, -1.28431, -50.7606, 83.5639, 8.61133, 56.6002, -44.4029, -2.18281, 78.4727, 25.6517, -20.2519, -88, 0.934035, -5.2906, 22.1183, 9.46125, 36.2735, -67.4181, 11.0981, 113.658, 21.1044, 0, -85.8164, -3.51535, -5.59112, 0.0455048, 6.95598, -8.35736, -38.0228, 0.742913, 108.014, 17.3178, -38.7762, 56.9732, -1.36868, -20.6842, 75.4331, -0.112162, -12.8762, 59.7669, 3.40466, 122, 1.0445, -33.0866, 51.0051, -11.5433, -13.7036, 107.788, -6.05687, 0.65257, 49.9144, -0.0188637, 94.8264, 33.9084, -13.9412, 6.44064, -15, -10.882, -19.0745, 7.2782, 113.556, -66.1021, -7.33981, 107.01, Average Score: 4708.2 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 20.4047, -48.367, -2.36111, -4.41859, -55, 86.8455, 12.7447, 57.0355, -49.6694, -6.00298, 75.9347, 23.0734, -23.2406, -83.6555, 2.0463, -4.25751, 19.0108, 2.56311, 28.6126, -65.6095, 9.58197, 113.846, 21.5717, 0, -88, -5.83459, -12.7774, 1.56188, 6.9343, -9.12918, -45.93, 6.29647, 104.497, 19.1388, -33.8266, 56.9862, -6.9459, -24.8104, 69.3351, 3.88297, -14.2697, 58.5941, 2.22983, 122, 3.72169, -29.1972, 50.7822, -13.45, -13.7051, 109.437, -0.625525, -2.25047, 52.5165, 8.81081, 97.5075, 28.6495, -11.7717, 4.85908, -13.5129, -16.6526, -13.5544, 2.11589, 116.266, -63.555, -10, 113.619, trial: 0, score: 5110 trial: 1, score: 4158 trial: 2, score: 4503 trial: 3, score: 3359 trial: 4, score: 4637 Policy 1: 20.4047, -48.367, -2.36111, -4.41859, -55, 86.8455, 12.7447, 57.0355, -49.6694, -6.00298, 75.9347, 23.0734, -23.2406, -83.6555, 2.0463, -4.25751, 19.0108, 2.56311, 28.6126, -65.6095, 9.58197, 113.846, 21.5717, 0, -88, -5.83459, -12.7774, 1.56188, 6.9343, -9.12918, -45.93, 6.29647, 104.497, 19.1388, -33.8266, 56.9862, -6.9459, -24.8104, 69.3351, 3.88297, -14.2697, 58.5941, 2.22983, 122, 3.72169, -29.1972, 50.7822, -13.45, -13.7051, 109.437, -0.625525, -2.25047, 52.5165, 8.81081, 97.5075, 28.6495, -11.7717, 4.85908, -13.5129, -16.6526, -13.5544, 2.11589, 116.266, -63.555, -10, 113.619, Average Score: 4353.4 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 19.5058, -44.1755, -5.28153, -8.89337, -55, 79.9561, 5.91003, 50.201, -43.6093, -2.77795, 79.9739, 27.2574, -17.8315, -88, 0.458837, -3, 19.313, 9.16329, 28.0364, -67.1149, 3.95382, 113.523, 22.8995, 0, -88, -10.5724, -13.0703, -8.16822, 9.02646, -11.1753, -36.6467, -3.23407, 102.756, 16.4957, -34.5334, 60.3471, -5.65302, -22.3296, 74.5141, -0.25633, -15.4956, 61.4461, -0.790478, 119.015, 0, -30.1848, 52.1942, -15, -19.531, 111.929, -0.390871, 1.13334, 47.9482, 2.18653, 94.734, 26.3112, -10.6521, 2.7712, -15, -16.8396, -21.8755, 7.19942, 109.951, -67.2358, -10, 113.569, trial: 0, score: 4733 trial: 1, score: 4766 trial: 2, score: 4574 trial: 3, score: 4831 trial: 4, score: 4159 Policy 1: 19.5058, -44.1755, -5.28153, -8.89337, -55, 79.9561, 5.91003, 50.201, -43.6093, -2.77795, 79.9739, 27.2574, -17.8315, -88, 0.458837, -3, 19.313, 9.16329, 28.0364, -67.1149, 3.95382, 113.523, 22.8995, 0, -88, -10.5724, -13.0703, -8.16822, 9.02646, -11.1753, -36.6467, -3.23407, 102.756, 16.4957, -34.5334, 60.3471, -5.65302, -22.3296, 74.5141, -0.25633, -15.4956, 61.4461, -0.790478, 119.015, 0, -30.1848, 52.1942, -15, -19.531, 111.929, -0.390871, 1.13334, 47.9482, 2.18653, 94.734, 26.3112, -10.6521, 2.7712, -15, -16.8396, -21.8755, 7.19942, 109.951, -67.2358, -10, 113.569, Average Score: 4612.6 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.3583, -45.6285, -6.22537, -5.87705, -49.297, 79.0864, 8.9437, 54.6683, -49.0119, -5.68903, 72.36, 26.3337, -15.3612, -83.1059, 2.87586, -7.11865, 21.6399, 8.30452, 28.0826, -69.2176, 4.43293, 121.116, 19.5374, -1.49105, -84.7793, -3.48872, -13.9582, -4.03447, 6.45185, -11.9507, -44.4714, 0.395029, 107.201, 17.3556, -37.6568, 58.7203, -4.13679, -27.6244, 76.4348, -0.0928072, -16.3154, 56.5353, 1.23488, 117.338, 1.62466, -32.0443, 56.8833, -10.6318, -17.0588, 107.922, -5.92596, -1.8396, 45.313, 7.15311, 99.6224, 24.7215, -10.6054, 10.5577, -13.2711, -13.0274, -15.0738, 1.07196, 108.984, -65.1426, -10, 104.12, trial: 0, score: 4253 trial: 1, score: 4317 trial: 2, score: 4319 trial: 3, score: 4637 trial: 4, score: 4343 Policy 1: 27.3583, -45.6285, -6.22537, -5.87705, -49.297, 79.0864, 8.9437, 54.6683, -49.0119, -5.68903, 72.36, 26.3337, -15.3612, -83.1059, 2.87586, -7.11865, 21.6399, 8.30452, 28.0826, -69.2176, 4.43293, 121.116, 19.5374, -1.49105, -84.7793, -3.48872, -13.9582, -4.03447, 6.45185, -11.9507, -44.4714, 0.395029, 107.201, 17.3556, -37.6568, 58.7203, -4.13679, -27.6244, 76.4348, -0.0928072, -16.3154, 56.5353, 1.23488, 117.338, 1.62466, -32.0443, 56.8833, -10.6318, -17.0588, 107.922, -5.92596, -1.8396, 45.313, 7.15311, 99.6224, 24.7215, -10.6054, 10.5577, -13.2711, -13.0274, -15.0738, 1.07196, 108.984, -65.1426, -10, 104.12, Average Score: 4373.8 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 27.5043, -48.9562, -8.58057, -4.50474, -53.6636, 84.1748, 9.80886, 52.6324, -43.5529, -3.0695, 77.4848, 29.6249, -20.3302, -87.3658, -3.14802, -3, 25.0824, 3.15555, 30.6733, -68.0699, 3.05092, 116.916, 24.1679, 0, -83.7904, -8.69531, -11.2597, -0.387816, 4.29312, -9.7185, -39.231, 3.58975, 102.087, 17.2637, -37.5009, 54.4331, -1.51906, -26.6873, 71.9488, 5.65719, -11.345, 52.2804, 2.95364, 119.36, 0, -30.7137, 56.8924, -13.2215, -18.9379, 107.014, -4.14261, 1.04595, 52.2739, 7.27016, 98.9644, 25.7898, -9.50235, 1.22396, -14.7364, -17.3636, -15.5303, 4.63842, 115.225, -68.8163, -10, 110.389, trial: 0, score: 4637 trial: 1, score: 4733 trial: 2, score: 3229 trial: 3, score: 4509 trial: 4, score: 4862 Policy 1: 27.5043, -48.9562, -8.58057, -4.50474, -53.6636, 84.1748, 9.80886, 52.6324, -43.5529, -3.0695, 77.4848, 29.6249, -20.3302, -87.3658, -3.14802, -3, 25.0824, 3.15555, 30.6733, -68.0699, 3.05092, 116.916, 24.1679, 0, -83.7904, -8.69531, -11.2597, -0.387816, 4.29312, -9.7185, -39.231, 3.58975, 102.087, 17.2637, -37.5009, 54.4331, -1.51906, -26.6873, 71.9488, 5.65719, -11.345, 52.2804, 2.95364, 119.36, 0, -30.7137, 56.8924, -13.2215, -18.9379, 107.014, -4.14261, 1.04595, 52.2739, 7.27016, 98.9644, 25.7898, -9.50235, 1.22396, -14.7364, -17.3636, -15.5303, 4.63842, 115.225, -68.8163, -10, 110.389, Average Score: 4394 --------------------------------- New Iteration Current Best Policy: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Current Best Policy Score: 5123.8 Policy 0 will be: 24.344, -47.4839, -3.70496, -4.69269, -53.8449, 83.0811, 10.4333, 54.5972, -48.375, -6.87819, 77.0718, 26.1434, -18.8483, -88, -1.28784, -3.70408, 20.9843, 4.46749, 32.0497, -64.6041, 7.11151, 118.334, 24.5257, -0.663021, -87.0323, -7.88955, -9.42374, -3.41968, 5.16411, -12.6524, -41.1155, 1.34013, 105.869, 14.3667, -37.9729, 55.6004, -3.52831, -23.7132, 72.3329, 3.24982, -11.8275, 56.7936, 0.0308476, 122, 1.31957, -29.2837, 54.2799, -15, -15.0727, 108.705, -4.97844, 0.0163741, 47.9327, 4.41657, 98.4882, 29.6368, -12.4431, 5.70258, -15, -15.7567, -17.248, 5.12743, 111.741, -68.3712, -10, 108.891, Policy 1 will be: 22.5401, -47.6917, -3.97614, -1.69778, -55, 86.093, 6.95821, 51.2349, -46.4977, -10, 82.0669, 25.7429, -21.3387, -86.9928, 2.69784, -3, 18.6021, 1.22439, 31.0339, -63.3637, 2.15121, 120.919, 19.7403, -2.4634, -88, -12.4329, -13.1184, 1.1738, 1.55053, -14.1255, -44.7241, 2.84336, 101.769, 9.78476, -37.7115, 60.1532, 0.436327, -23.8944, 71.476, 7.44903, -7.46625, 58.3682, 0.229936, 117.592, 4.44922, -29.1322, 51.6657, -15, -12.7242, 106.149, -5.34068, -0.600245, 43.8969, 0.0927831, 100.378, 28.3962, -10.0274, 7.20111, -15, -17.0296, -19.3605, 6.34055, 111.807, -64.2746, -8.03191, 104.985,