| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | --- | 
					
						
							|  |  |  | id: 5e8f2f13c4cdbe86b5c72da5 | 
					
						
							| 
									
										
										
										
											2021-02-06 04:42:36 +00:00
										 |  |  | title: 'Reinforcement Learning With Q-Learning: Example' | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | challengeType: 11 | 
					
						
							|  |  |  | videoId: RBBSNta234s | 
					
						
							| 
									
										
										
										
											2021-01-13 03:31:00 +01:00
										 |  |  | dashedName: reinforcement-learning-with-q-learning-example | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | --- | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | # --question--
 | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | ## --text--
 | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | Fill in the blanks to complete the following Q-Learning equation: | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | ```py | 
					
						
							|  |  |  | Q[__A__, __B__] = Q[__A__, __B__] + LEARNING_RATE * (reward + GAMMA * np.max(Q[__C__, :]) - Q[__A__, __B__]) | 
					
						
							|  |  |  | ``` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## --answers--
 | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | A: `state` | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | B: `action` | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | C: `next_state` | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | --- | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | A: `state` | 
					
						
							| 
									
										
										
										
											2020-08-13 12:00:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 00:37:30 -07:00
										 |  |  | B: `action` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | C: `prev_state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | --- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A: `state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | B: `reaction` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | C: `next_state` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## --video-solution--
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 1 | 
					
						
							|  |  |  | 
 |