| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | --- | 
					
						
							|  |  |  | id: 5e8f2f13c4cdbe86b5c72da5 | 
					
						
							| 
									
										
										
										
											2020-04-24 05:52:42 -05:00
										 |  |  | title: 'Reinforcement Learning With Q-Learning: Example' | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | challengeType: 11 | 
					
						
							|  |  |  | videoId: RBBSNta234s | 
					
						
							| 
									
										
										
										
											2021-10-01 12:24:12 +08:00
										 |  |  | bilibiliIds: | 
					
						
							|  |  |  |   aid: 848073871 | 
					
						
							|  |  |  |   bvid: BV1uL4y187Eq | 
					
						
							|  |  |  |   cid: 409139471 | 
					
						
							| 
									
										
										
										
											2021-01-13 03:31:00 +01:00
										 |  |  | dashedName: reinforcement-learning-with-q-learning-example | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | --- | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | # --question--
 | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | ## --text--
 | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | Fill in the blanks to complete the following Q-Learning equation: | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | ```py | 
					
						
							|  |  |  | Q[__A__, __B__] = Q[__A__, __B__] + LEARNING_RATE * (reward + GAMMA * np.max(Q[__C__, :]) - Q[__A__, __B__]) | 
					
						
							|  |  |  | ``` | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | ## --answers--
 | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | A: `state` | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | B: `action` | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | C: `next_state` | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | --- | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | A: `state` | 
					
						
							| 
									
										
										
										
											2020-05-28 22:40:36 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | B: `action` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | C: `prev_state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | --- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A: `state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | B: `reaction` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | C: `next_state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## --video-solution--
 | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-27 19:02:05 +01:00
										 |  |  | 1 | 
					
						
							| 
									
										
										
										
											2020-04-21 11:19:42 -04:00
										 |  |  | 
 |