tese_spy.aux

\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{abnt-url-package=hyperref}
\citation{abnt-emphasize=bf}
\citation{abnt-etal-cite=2}
\citation{abnt-etal-list=0}
\citation{abnt-etal-text=it}
\select@language{english}
\@writefile{toc}{\select@language{english}}
\@writefile{lof}{\select@language{english}}
\@writefile{lot}{\select@language{english}}
\gdef \LT@i {\LT@entry 
    {2}{59.75963pt}\LT@entry 
    {1}{270.28549pt}}
\gdef \LT@ii {\LT@entry 
    {1}{154.24649pt}\LT@entry 
    {2}{298.3846pt}}
\citation{AIapplications}
\citation{RLNature2015}
\citation{AlphaGoZero}
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{14}{chapter.1}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\newlabel{chap:introduction}{{1}{14}{Introduction}{chapter.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {1.1}Motivation}{14}{section.1.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces DeepMind recent achievements.\relax }}{15}{figure.caption.8}}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:deepmind_examples}{{1.1}{15}{DeepMind recent achievements.\relax }{figure.caption.8}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces Simulated humanoid agent movements and AI.\relax }}{16}{figure.caption.9}}
\newlabel{fig:humanoid_AI}{{1.2}{16}{Simulated humanoid agent movements and AI.\relax }{figure.caption.9}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces Robocup symbol and Soccer 3D Simulation league match.\relax }}{16}{figure.caption.10}}
\newlabel{fig:robocup}{{1.3}{16}{Robocup symbol and Soccer 3D Simulation league match.\relax }{figure.caption.10}{}}
\@writefile{toc}{\contentsline {section}{\numberline {1.2}Problem Statement}{16}{section.1.2}}
\citation{deepmimic}
\citation{TDLearning}
\citation{QLearning}
\citation{REINFORCE}
\citation{DDPG}
\citation{TRPO}
\citation{PPO}
\citation{RLNature2015}
\citation{ReplayBuffer}
\citation{RLNature2015}
\@writefile{toc}{\contentsline {section}{\numberline {1.3}Approach}{17}{section.1.3}}
\@writefile{toc}{\contentsline {section}{\numberline {1.4}Literature Review}{17}{section.1.4}}
\citation{deepmind1}
\citation{BengioCurrLearning}
\citation{deepmind2}
\citation{deepmind3}
\citation{gail}
\citation{deepmimic}
\citation{abbas}
\citation{abbas}
\citation{deepmimic}
\citation{TGMuzio}
\@writefile{toc}{\contentsline {section}{\numberline {1.5}Contributions}{19}{section.1.5}}
\@writefile{toc}{\contentsline {section}{\numberline {1.6}Outline of this Dissertation}{19}{section.1.6}}
\citation{Sutton1998}
\@writefile{toc}{\contentsline {chapter}{\numberline {2}Reinforcement Learning}{20}{chapter.2}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\newlabel{chap:rl}{{2}{20}{Reinforcement Learning}{chapter.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2.1}Model Introduction}{20}{section.2.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces Agent interacting with environment.\relax }}{20}{figure.caption.11}}
\newlabel{fig:RL_basic_model}{{2.1}{20}{Agent interacting with environment.\relax }{figure.caption.11}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2.2}Markov Decision Processes}{21}{section.2.2}}
\newlabel{eq:markov_basic}{{2.1}{21}{Markov Decision Processes}{equation.2.2.1}{}}
\newlabel{eq:state_value_function_definition}{{2.4}{22}{Markov Decision Processes}{equation.2.2.4}{}}
\newlabel{eq:action_value_function_definition}{{2.5}{22}{Markov Decision Processes}{equation.2.2.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Optimality in Reinforcement Learning}{22}{subsection.2.2.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1.1}Optimal Value Function}{22}{subsubsection.2.2.1.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1.2}Optimal Policy}{22}{subsubsection.2.2.1.2}}
\newlabel{eq:policy_ordering}{{2.6}{22}{Optimal Policy}{equation.2.2.6}{}}
\citation{lecture1DS}
\newlabel{eq:policy_from_greedy}{{2.7}{23}{Optimal Policy}{equation.2.2.7}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2.3}RL Algorithms}{23}{section.2.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}Categorizing RL}{23}{subsection.2.3.1}}
\citation{Sutton1998}
\@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces Classical division among RL algorithms classifications.\relax }}{24}{figure.caption.12}}
\newlabel{fig:categorizing_RL}{{2.2}{24}{Classical division among RL algorithms classifications.\relax }{figure.caption.12}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.2}Value Function Methods}{24}{subsection.2.3.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.2.1}Monte Carlo Methods}{24}{subsubsection.2.3.2.1}}
\citation{Sutton1998}
\citation{TDLearning}
\newlabel{eq:epsilon-greedy}{{2.8}{25}{Monte Carlo Methods}{equation.2.3.8}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.2.2}Temporal-Difference and Sarsa}{25}{subsubsection.2.3.2.2}}
\newlabel{eq:TD0_update}{{2.9}{25}{Temporal-Difference and Sarsa}{equation.2.3.9}{}}
\citation{Sutton1998}
\citation{QLearning}
\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Sarsa algorithm\relax }}{26}{algocf.1}}
\newlabel{algo:sarsa}{{1}{26}{Temporal-Difference and Sarsa}{algocf.1}{}}
\newlabel{eq:sarsa_lambda}{{2.11}{26}{Temporal-Difference and Sarsa}{equation.2.3.11}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.2.3}Q-Learning}{26}{subsubsection.2.3.2.3}}
\newlabel{eq:importance_sampling}{{2.12}{26}{Q-Learning}{equation.2.3.12}{}}
\newlabel{eq:qlearning_update}{{2.13}{27}{Q-Learning}{equation.2.3.13}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.3}Policy Search Methods}{27}{subsection.2.3.3}}
\newlabel{eq:tau_def}{{2.14}{27}{Policy Search Methods}{equation.2.3.14}{}}
\newlabel{eq:theta_max}{{2.15}{27}{Policy Search Methods}{equation.2.3.15}{}}
\newlabel{eq:rewrite_objective}{{2.16}{27}{Policy Search Methods}{equation.2.3.16}{}}
\citation{REINFORCE}
\newlabel{eq:grad_with_scorefunc}{{2.17}{28}{Policy Search Methods}{equation.2.3.17}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces REINFORCE algorithm\relax }}{28}{algocf.2}}
\newlabel{algo:reinforce}{{2}{28}{Policy Search Methods}{algocf.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.4}Actor-Critic}{28}{subsection.2.3.4}}
\newlabel{eq:actor_critic_gradient}{{2.18}{28}{Actor-Critic}{equation.2.3.18}{}}
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Deep Reinforcement Learning}{29}{chapter.3}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\newlabel{chap:deep_rl}{{3}{29}{Deep Reinforcement Learning}{chapter.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3.1}Neural Networks}{29}{section.3.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.1}Representation}{29}{subsection.3.1.1}}
\newlabel{eq:common_activation_functions}{{3.1}{30}{Representation}{equation.3.1.1}{}}
\newlabel{eq:linear_update_nn}{{3.2}{30}{Representation}{equation.3.1.2}{}}
\newlabel{eq:non_linear_update_nn}{{3.3}{30}{Representation}{equation.3.1.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces Shallow Neural Network Architecture.\relax }}{30}{figure.caption.13}}
\newlabel{fig:nn_basic_architecture}{{3.1}{30}{Shallow Neural Network Architecture.\relax }{figure.caption.13}{}}
\newlabel{eq:quadratic_cost}{{3.4}{31}{Representation}{equation.3.1.4}{}}
\newlabel{eq:cross_entropy_cost}{{3.5}{31}{Representation}{equation.3.1.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.2}Vectorization}{31}{subsection.3.1.2}}
\newlabel{eq:vectorization_concatenation_X}{{3.6}{31}{Vectorization}{equation.3.1.6}{}}
\newlabel{eq:vectorization_concatenation}{{3.7}{31}{Vectorization}{equation.3.1.7}{}}
\newlabel{eq:linear_update_nn_vectorized}{{3.8}{32}{Vectorization}{equation.3.1.8}{}}
\newlabel{eq:non_linear_update_nn_vectorized}{{3.9}{32}{Vectorization}{equation.3.1.9}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.3}Forward Propagation}{32}{subsection.3.1.3}}
\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Forward Propagation\relax }}{32}{algocf.3}}
\newlabel{algo:forward_propagation}{{3}{32}{Forward Propagation}{algocf.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1.4}Backward Propagation}{32}{subsection.3.1.4}}
\newlabel{eq:partial_derivatives_notation_first}{{3.10}{32}{Backward Propagation}{equation.3.1.10}{}}
\newlabel{eq:partial_derivatives_notation_final}{{3.13}{32}{Backward Propagation}{equation.3.1.13}{}}
\citation{TRPO}
\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Backward Propagation\relax }}{33}{algocf.4}}
\newlabel{algo:backward_propagation}{{4}{33}{Backward Propagation}{algocf.4}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {5}{\ignorespaces Gradient Descent\relax }}{33}{algocf.5}}
\newlabel{algo:gradient_descent}{{5}{33}{Backward Propagation}{algocf.5}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3.2}Trust Region Policy Optimization}{33}{section.3.2}}
\newlabel{eq:KL_divergence}{{3.14}{34}{Trust Region Policy Optimization}{equation.3.2.14}{}}
\newlabel{eq:TRPO_1}{{3.15}{34}{Trust Region Policy Optimization}{equation.3.2.15}{}}
\newlabel{eq:TRPO_2}{{3.16}{34}{Trust Region Policy Optimization}{equation.3.2.16}{}}
\citation{PPO}
\citation{PPO}
\@writefile{loa}{\contentsline {algocf}{\numberline {6}{\ignorespaces TRPO\relax }}{35}{algocf.6}}
\newlabel{algo:TRPO}{{6}{35}{Trust Region Policy Optimization}{algocf.6}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3.3}Proximal Policy Optimization (PPO)}{35}{section.3.3}}
\newlabel{eq:def_r_t}{{3.17}{35}{Proximal Policy Optimization (PPO)}{equation.3.3.17}{}}
\newlabel{eq:PPO_objective_function}{{3.18}{35}{Proximal Policy Optimization (PPO)}{equation.3.3.18}{}}
\bibdata{abnt-options,Referencias/referencias}
\newlabel{eq:clip_function}{{3.19}{36}{Proximal Policy Optimization (PPO)}{equation.3.3.19}{}}
\bibcite{abbas}{Abdolmaleki \textit  {et al.} 2016}
\bibciteEXPL{abbas}{Abdolmaleki \textit  {et al.}}
\bibciteIMPL{abbas}{ABDOLMALEKI \textit  {et al.}}
\bibciteYEAR{abbas}{2016}
\bibcite{TDLearning}{Barto \textit  {et al.} 1983}
\bibciteEXPL{TDLearning}{Barto \textit  {et al.}}
\bibciteIMPL{TDLearning}{BARTO \textit  {et al.}}
\bibciteYEAR{TDLearning}{1983}
\bibcite{BengioCurrLearning}{Bengio \textit  {et al.} 2009}
\bibciteEXPL{BengioCurrLearning}{Bengio \textit  {et al.}}
\bibciteIMPL{BengioCurrLearning}{BENGIO \textit  {et al.}}
\bibciteYEAR{BengioCurrLearning}{2009}
\bibcite{deepmind1}{Heess \textit  {et al.} 2017}
\bibciteEXPL{deepmind1}{Heess \textit  {et al.}}
\bibciteIMPL{deepmind1}{HEESS \textit  {et al.}}
\bibciteYEAR{deepmind1}{2017}
\bibcite{gail}{Ho e Ermon 2016}
\bibciteEXPL{gail}{Ho e Ermon}
\bibciteIMPL{gail}{HO; ERMON}
\bibciteYEAR{gail}{2016}
\bibcite{DDPG}{Lillicrap \textit  {et al.} 2015}
\bibciteEXPL{DDPG}{Lillicrap \textit  {et al.}}
\bibciteIMPL{DDPG}{LILLICRAP \textit  {et al.}}
\bibciteYEAR{DDPG}{2015}
\bibcite{ReplayBuffer}{Lin 1992}
\bibciteEXPL{ReplayBuffer}{Lin}
\bibciteIMPL{ReplayBuffer}{LIN}
\bibciteYEAR{ReplayBuffer}{1992}
\bibcite{AIapplications}{Marr}
\bibciteEXPL{AIapplications}{Marr}
\bibciteIMPL{AIapplications}{MARR}
\bibciteYEAR{AIapplications}{}
\bibcite{deepmind2}{Merel \textit  {et al.} 2017}
\bibciteEXPL{deepmind2}{Merel \textit  {et al.}}
\bibciteIMPL{deepmind2}{MEREL \textit  {et al.}}
\bibciteYEAR{deepmind2}{2017}
\bibcite{RLNature2015}{Mnih \textit  {et al.} 2015}
\bibciteEXPL{RLNature2015}{Mnih \textit  {et al.}}
\bibciteIMPL{RLNature2015}{MNIH \textit  {et al.}}
\bibciteYEAR{RLNature2015}{2015}
\@writefile{toc}{\contentsline {chapter}{Bibliography}{37}{chapter.4}}
\bibcite{TGMuzio}{Muzio \textit  {et al.} 2017}
\bibciteEXPL{TGMuzio}{Muzio \textit  {et al.}}
\bibciteIMPL{TGMuzio}{MUZIO \textit  {et al.}}
\bibciteYEAR{TGMuzio}{2017}
\bibcite{deepmimic}{Peng \textit  {et al.} 2018}
\bibciteEXPL{deepmimic}{Peng \textit  {et al.}}
\bibciteIMPL{deepmimic}{PENG \textit  {et al.}}
\bibciteYEAR{deepmimic}{2018}
\bibcite{TRPO}{Schulman \textit  {et al.} 2015}
\bibciteEXPL{TRPO}{Schulman \textit  {et al.}}
\bibciteIMPL{TRPO}{SCHULMAN \textit  {et al.}}
\bibciteYEAR{TRPO}{2015}
\bibcite{PPO}{Schulman \textit  {et al.} 2017}
\bibciteEXPL{PPO}{Schulman \textit  {et al.}}
\bibciteIMPL{PPO}{SCHULMAN \textit  {et al.}}
\bibciteYEAR{PPO}{2017}
\bibcite{lecture1DS}{Silver}
\bibciteEXPL{lecture1DS}{Silver}
\bibciteIMPL{lecture1DS}{SILVER}
\bibciteYEAR{lecture1DS}{}
\bibcite{AlphaGoZero}{Silver \textit  {et al.} 2017}
\bibciteEXPL{AlphaGoZero}{Silver \textit  {et al.}}
\bibciteIMPL{AlphaGoZero}{SILVER \textit  {et al.}}
\bibciteYEAR{AlphaGoZero}{2017}
\bibcite{Sutton1998}{Sutton e Barto 1998}
\bibciteEXPL{Sutton1998}{Sutton e Barto}
\bibciteIMPL{Sutton1998}{SUTTON; BARTO}
\bibciteYEAR{Sutton1998}{1998}
\bibcite{deepmind3}{Wang \textit  {et al.} 2017}
\bibciteEXPL{deepmind3}{Wang \textit  {et al.}}
\bibciteIMPL{deepmind3}{WANG \textit  {et al.}}
\bibciteYEAR{deepmind3}{2017}
\bibcite{QLearning}{Watkins 1989}
\bibciteEXPL{QLearning}{Watkins}
\bibciteIMPL{QLearning}{WATKINS}
\bibciteYEAR{QLearning}{1989}
\bibcite{REINFORCE}{Wiliams 1992}
\bibciteEXPL{REINFORCE}{Wiliams}
\bibciteIMPL{REINFORCE}{WILIAMS}
\bibciteYEAR{REINFORCE}{1992}
\newlabel{LastPage}{{}{39}{}{page.39}{}}
\xdef\lastpage@lastpage{39}
\xdef\lastpage@lastpageHy{39}
\bibstyle{abnt-alf}