Ajout d'une bibliographie. Nombreux ajouts :

- Algorithme UCB - Prédiction convexe avec info totale
author: Thibaut Horel <thibaut.horel@gmail.com> 2010-06-22 01:16:14 +0200
committer: Thibaut Horel <thibaut.horel@gmail.com> 2010-06-22 01:16:14 +0200
commit: 71b1a44313019a7c58e038d6bb6d2b0aba44ec1f (patch)
tree: 3ffc0edc6b6cbb398fe79aecb9d8c2fb91a92dc9
parent: 5a3533fa3f2fb02fd3cda2b546549a61b8d0407b (diff)
download: bandits-71b1a44313019a7c58e038d6bb6d2b0aba44ec1f.tar.gz
4 files changed, 1906 insertions, 181 deletions
diff --git a/biblio.bib b/biblio.bib
new file mode 100644
index 0000000..a8ecc26
--- /dev/null
+++ b/biblio.bib
@@ -0,0 +1,95 @@
+@book{plg,
+  title="Prediction, learning, and games",
+  author="Cesa-Bianchi, Nicoló and Lugosi, Gàbor",
+  year="2006",
+  publisher="Cambridge University Press"
+}
+
+@conference{zink,
+  title="Online Convex Programming and Generalized Infinitesimal
+Gradient Ascent",
+  author="Zinkevich, Martin",
+  booktitle="Proceedings of The 20th International Conference
+on Machine Learning",
+  year="2003"
+}
+
+@conference{linband,
+  title="Competing in the Dark: An Efficient Algorithm for Bandit Linear
+Optimization ",
+  author="Jacob Abernethy and Elad Hazan and Alexander Rakhlin",
+  booktitle="Proceedings of The 21st Annual Conference on
+Learning Theory",
+  year="2008",
+  month="Juillet",
+}
+
+@unpublished{exp4,
+  title="An Optimal High Probability Algorithm for the Contextual Bandit
+Problem",
+  author="Alina Beygelzimer and John Langford and Lihong Li and Lev Reyzin and
+Robert E. Schapire",
+  note="Publié sur arXiv",
+  year="2010"
+}
+
+@unpublished{audbu,
+  title="Minimax Policies for Bandits Games",
+  author="Jean-Yves Audibert and Sébastien Bubeck",
+  year="2010",
+  note="À paraître"
+}
+
+@article{bandsto,
+  title="Finite time analysis of the multiarmed bandit problem",
+  author="Peter Auer and Cesa-Bianchi, Nicoló and Paul Fischer",
+  journal="Machine Learning",
+  year="2002",
+  volume="47",
+  pages="235--256",
+  number="2/3"
+}
+
+@article{bandet,
+  title="The nonstochastic multiarmed bandit problem",
+  author="Peter Auer and Nicoló Cesa-Bianchi and Yoav Freund and Robert E.
+Schapire",
+  journal="SIAM Journal on Computing",
+  volume="32",
+  number="1",
+  pages="48--77",
+  year="2002"
+}
+
+@conference{multi,
+  title="Optimal Algorithms for Online Convex Optimization with Multi-Point
+Bandit Feedback",
+  author="Alekh Agarwal and Ofer Dekel and Lin Xiao",
+  booktitle="Proceedings of The 23rd Annual Conference on
+Learning Theory",
+  year="2010"
+}
+
+@article{label,
+  title="Minimizing regret with label efficient prediction",
+  author="Cesa-Bianchi, Nicoló and Lugosi, Gàbor and Gilles Stoltz",
+  journal="IEEE Transactions on Information Theory",
+  volume="51",
+  number="6",
+  year="2005",
+  pages="2152--2162"
+}
+
+@book{massart,
+  title="Concentration Inequalities and Model Selection",
+  author="Pascal Massart",
+  series="Lecture Notes in Mathematics",
+  publisher="Springer",
+  note="École d'Eté de Probabilités de Saint-Flour XXXIII -- 2003",
+  year="2007"
+}
+
+
+
+
+
diff --git a/plain-fr.bst b/plain-fr.bst
new file mode 100755
index 0000000..a47e102
--- /dev/null
+++ b/plain-fr.bst
@@ -0,0 +1,1235 @@
+%%
+%% Bib. style "plain-fr" (version francisee de plain.bst)
+%%
+%% NM, 2004/02/22
+%% markey@lsv.ens-cachan.fr 
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Original Copyright (Oren Patashnik):
+%%
+ % This program may be distributed and/or modified under the
+ % conditions of the LaTeX Project Public License, either version 1.2
+ % of this license or (at your option) any later version.
+ % The latest version of this license is in
+ %   http://www.latex-project.org/lppl.txt
+ % and version 1.2 or later is part of all distributions of LaTeX
+ % version 1999/12/01 or later.
+ %
+ % This program consists of the files plain.bst
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+ENTRY
+  { address
+    author
+    booktitle
+    chapter
+    edition
+    editor
+    howpublished
+    institution
+    journal
+    key
+    month
+    note
+    number
+    organization
+    pages
+    publisher
+    school
+    series
+    title
+    type
+    volume
+    year
+  }
+  {}
+  { label }
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% C'est ici que je definis les "traductions". Normalement, y a 
+%% que ca a changer pour franciser le style...
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+FUNCTION{fr.and}{		%% le "et" entre les deux derniers auteurs
+" et "
+}
+
+FUNCTION{fr.editeur}{		%% ", \'editeur" au singulier, ou " (\'editeur)"...
+", \'editeur"
+}
+
+FUNCTION{fr.editeurs}{		%% ", \'editeurs" au pluriel, ou " (\'editeurs)"...
+", \'editeurs"
+}
+
+FUNCTION{fr.et.al}{		%% " et~al."
+" \emph{et~al.}"
+}
+
+FUNCTION{fr.in}{		%% "Dans "
+"\emph{In} "
+}
+
+FUNCTION{fr.in.spc}{		%% " de " (ou " de la s\'erie ")
+" de "
+}
+
+FUNCTION{fr.of}{		%% " de "
+" de "
+}
+
+FUNCTION{fr.number}{		%% "num{\'e}ro "
+"num\'ero "
+}
+
+FUNCTION{fr.number.maj}{	%% "Num\'ero "
+"Num\'ero "
+}
+
+FUNCTION{fr.volume}{		%% "volume"
+"volume"
+}
+
+FUNCTION{fr.volume.maj}{	%% "Volume"
+"Volume"
+}
+
+FUNCTION{fr.edition}{		%% " \'edition"
+" \'edition"
+}
+
+FUNCTION{fr.pages}{		%% "pages"
+"pages"
+}
+
+FUNCTION{fr.page}{		%% "page"
+"page"
+}
+
+FUNCTION{fr.chapter}{		%% "chapitre"
+"chapitre"
+}
+
+FUNCTION{fr.tech.rep}{		%% "Rapport Technique"
+"Rapport technique"
+}
+
+FUNCTION{fr.master}{		%% "M\'emoire de D.E.A."
+"M\'emoire de D.E.A."
+}
+
+FUNCTION{fr.phd}{		%% "Th\`ese de doctorat"
+"Th\`ese de doctorat"
+}
+
+FUNCTION{fr.auteurs.style}{	
+	%% si on veut des small caps sur le LASTNAME de l'auteur
+	%% Cette fonction est utilisee dans la definition d'une
+	%% fonction LaTeX.
+"\scshape"
+}
+
+FUNCTION{fr.ponctuation.apres.auteurs}{ %% Comme son nom l'indique...
+					%% Laisser vide pour un "."
+" : "
+}
+
+FUNCTION{fr.deuxpoints}{        %% Pour eviter l'espace avant les ":".
+"\string:\penalty500\relax "
+}
+
+MACRO {jan} {"janvier"}
+MACRO {feb} {"f\'evrier"}
+MACRO {mar} {"mars"}
+MACRO {apr} {"avril"}
+MACRO {may} {"mai"}
+MACRO {jun} {"juin"}
+MACRO {jul} {"juillet"}
+MACRO {aug} {"ao\^ut"}
+MACRO {sep} {"septembre"}
+MACRO {oct} {"octobre"}
+MACRO {nov} {"novembre"}
+MACRO {dec} {"d\'ecembre"}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% La suite, normalement, y a rien a changer...
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+
+
+INTEGERS { output.state before.all mid.sentence after.sentence after.block after.authors }
+
+FUNCTION {init.state.consts}
+{ #0 'before.all :=
+  #1 'mid.sentence :=
+  #2 'after.sentence :=
+  #3 'after.block :=
+  #0 'after.authors :=
+}
+
+STRINGS { s t }
+
+FUNCTION{ fr.add.period }{
+after.authors 
+  { fr.ponctuation.apres.auteurs empty$
+      { add.period$ }
+      { fr.ponctuation.apres.auteurs * #0 'after.authors := }
+    if$}
+  { add.period$ }
+if$
+}
+
+FUNCTION {output.nonnull}
+{ 's :=
+  output.state mid.sentence =
+    { ", " * write$ }
+    { output.state after.block =
+	{ fr.add.period write$
+	  newline$
+	  "\newblock " write$
+	}
+	{ output.state before.all =
+	    'write$
+	    { add.period$ " " * write$ }
+	  if$
+	}
+      if$
+      mid.sentence 'output.state :=
+    }
+  if$
+  s
+}
+
+FUNCTION {output}
+{ duplicate$ empty$
+    'pop$
+    'output.nonnull
+  if$
+}
+
+FUNCTION {output.check}
+{ 't :=
+  duplicate$ empty$
+    { pop$ "empty " t * " in " * cite$ * warning$ }
+    'output.nonnull
+  if$
+}
+
+FUNCTION {output.bibitem}
+{ newline$
+  "\bibitem{" write$
+  cite$ write$
+  "}" write$
+  newline$
+  ""
+  before.all 'output.state :=
+}
+
+FUNCTION {fin.entry}
+{ add.period$
+  write$
+  newline$
+}
+
+FUNCTION {new.block}
+{ output.state before.all =
+    'skip$
+    { after.block 'output.state := }
+  if$
+}
+
+FUNCTION {new.sentence}
+{ output.state after.block =
+    'skip$
+    { output.state before.all =
+	'skip$
+	{ after.sentence 'output.state := }
+      if$
+    }
+  if$
+}
+
+FUNCTION {not}
+{   { #0 }
+    { #1 }
+  if$
+}
+
+FUNCTION {and}
+{   'skip$
+    { pop$ #0 }
+  if$
+}
+
+FUNCTION {or}
+{   { pop$ #1 }
+    'skip$
+  if$
+}
+
+FUNCTION {new.block.checka}
+{ empty$
+    'skip$
+    'new.block
+  if$
+}
+
+FUNCTION {new.block.checkb}
+{ empty$
+  swap$ empty$
+  and
+    'skip$
+    'new.block
+  if$
+}
+
+FUNCTION {new.sentence.checka}
+{ empty$
+    'skip$
+    'new.sentence
+  if$
+}
+
+FUNCTION {new.sentence.checkb}
+{ empty$
+  swap$ empty$
+  and
+    'skip$
+    'new.sentence
+  if$
+}
+
+FUNCTION {field.or.null}
+{ duplicate$ empty$
+    { pop$ "" }
+    'skip$
+  if$
+}
+
+FUNCTION {emphasize}
+{ duplicate$ empty$
+    { pop$ "" }
+    { "{\em " swap$ * "}" * }
+  if$
+}
+
+INTEGERS { nameptr namesleft numnames }
+
+FUNCTION {format.names}
+{ 's :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr "{ff~}{vv~}\bgroup\fonteauteurs\bgroup {ll}\egroup\egroup{{}}{, jj}" format.name$ 't :=
+      nameptr #1 >
+	{ namesleft #1 >
+	    { ", " * t * }
+	    {   %numnames #2 > namesleft #2 > and
+		%  { "," * }
+		%  'skip$
+	        %if$
+	      t "\bgroup\fonteauteurs\bgroup others\egroup\egroup{}" =
+		{ fr.et.al * }
+		{ fr.and * t * }
+	      if$
+	    }
+	  if$
+	}
+	't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {format.authors}
+{ author empty$
+    { "" }
+    { author format.names #1 'after.authors := }
+  if$
+}
+
+FUNCTION {format.editors}
+{ editor empty$
+    { "" }
+    { editor format.names
+      editor num.names$ #1 >
+	{ fr.editeurs * }
+	{ fr.editeur * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.title}
+{ title empty$
+    { "" }
+    { title "t" change.case$ }
+  if$
+}
+
+FUNCTION {n.dashify}
+{ 't :=
+  ""
+    { t empty$ not }
+    { t #1 #1 substring$ "-" =
+	{ t #1 #2 substring$ "--" = not
+	    { "--" *
+	      t #2 global.max$ substring$ 't :=
+	    }
+	    {   { t #1 #1 substring$ "-" = }
+		{ "-" *
+		  t #2 global.max$ substring$ 't :=
+		}
+	      while$
+	    }
+	  if$
+	}
+	{ t #1 #1 substring$ *
+	  t #2 global.max$ substring$ 't :=
+	}
+      if$
+    }
+  while$
+}
+
+FUNCTION {format.date}
+{ year empty$
+    { month empty$
+	{ "" }
+	{ "there's a month but no year in " cite$ * warning$
+	  month
+	}
+      if$
+    }
+    { month empty$
+	'year
+	{ month " " * year * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.btitle}
+{ title emphasize
+}
+
+FUNCTION {tie.or.space.connect}
+{ duplicate$ text.length$ #3 <
+    { "~" }
+    { " " }
+  if$
+  swap$ * *
+}
+
+FUNCTION {either.or.check}
+{ empty$
+    'pop$
+    { "can't use both " swap$ * " fields in " * cite$ * warning$ }
+  if$
+}
+
+FUNCTION {format.bvolume}
+{ volume empty$
+    { "" }
+    { fr.volume volume tie.or.space.connect
+      series empty$
+	'skip$
+	{ fr.of * series emphasize * }
+      if$
+      "volume and number" number either.or.check
+    }
+  if$
+}
+
+FUNCTION {format.number.series}
+{ volume empty$
+    { number empty$
+	{ series field.or.null }
+	{ output.state mid.sentence =
+	    { fr.number }
+	    { fr.number.maj }
+	  if$
+	  number tie.or.space.connect
+	  series empty$
+	    { "there's a number but no series in " cite$ * warning$ }
+	    { fr.in.spc * series * }
+	  if$
+	}
+      if$
+    }
+    { "" }
+  if$
+}
+
+FUNCTION {format.edition}
+{ edition empty$
+    { "" }
+    { output.state mid.sentence =
+	{ edition "l" change.case$ fr.edition * }
+	{ edition "t" change.case$ fr.edition * }
+      if$
+    }
+  if$
+}
+
+INTEGERS { multiresult }
+
+FUNCTION {multi.page.check}
+{ 't :=
+  #0 'multiresult :=
+    { multiresult not
+      t empty$ not
+      and
+    }
+    { t #1 #1 substring$
+      duplicate$ "-" =
+      swap$ duplicate$ "," =
+      swap$ "+" =
+      or or
+	{ #1 'multiresult := }
+	{ t #2 global.max$ substring$ 't := }
+      if$
+    }
+  while$
+  multiresult
+}
+
+FUNCTION {format.pages}
+{ pages empty$
+    { "" }
+    { pages multi.page.check
+	{ fr.pages pages n.dashify tie.or.space.connect }
+	{ fr.page pages tie.or.space.connect }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.vol.num.pages}
+{ volume field.or.null
+  number empty$
+    'skip$
+    { "(" number * ")" * *
+      volume empty$
+	{ "there's a number but no volume in " cite$ * warning$ }
+	'skip$
+      if$
+    }
+  if$
+  pages empty$
+    'skip$
+    { duplicate$ empty$
+	{ pop$ format.pages }
+	{ fr.deuxpoints * pages n.dashify * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.chapter.pages}
+{ chapter empty$
+    'format.pages
+    { type empty$
+	{ fr.chapter }
+	{ type "l" change.case$ }
+      if$
+      chapter tie.or.space.connect
+      pages empty$
+	'skip$
+	{ ", " * format.pages * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.in.ed.booktitle}
+{ booktitle empty$
+    { "" }
+    { editor empty$
+	{ fr.in booktitle emphasize * }
+	{ fr.in format.editors #1 'after.authors := fr.add.period " " * * booktitle emphasize * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {empty.misc.check}
+{ author empty$ title empty$ howpublished empty$
+  month empty$ year empty$ note empty$
+  and and and and and
+  key empty$ not and
+    { "all relevant fields are empty in " cite$ * warning$ }
+    'skip$
+  if$
+}
+
+FUNCTION {format.thesis.type}
+{ type empty$
+    'skip$
+    { pop$
+      type "t" change.case$
+    }
+  if$
+}
+
+FUNCTION {format.tr.number}
+{ type empty$
+    { fr.tech.rep }
+    'type
+  if$
+  number empty$
+    { "t" change.case$ }
+    { number tie.or.space.connect }
+  if$
+}
+
+FUNCTION {format.article.crossref}
+{ key empty$
+    { journal empty$
+	{ "need key or journal for " cite$ * " to crossref " * crossref *
+	  warning$
+	  ""
+	}
+	{ fr.in "{\em " * journal * "\/}" * }
+      if$
+    }
+    { fr.in key * }
+  if$
+  " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.crossref.editor}
+{ editor #1 "{vv~}\bgroup\fonteauteurs\bgroup {ll}\egroup\egroup{{}}" format.name$
+  editor num.names$ duplicate$
+  #2 >
+    { pop$ fr.et.al * }
+    { #2 <
+	'skip$
+	{ editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
+	    { fr.et.al * }
+	    { fr.and * editor #2 "{vv~}\bgroup\fonteauteurs\bgroup {ll}\egroup\egroup{{}}" format.name$ * }
+	  if$
+	}
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.book.crossref}
+{ volume empty$
+    { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
+      fr.in
+    }
+    { fr.volume.maj volume tie.or.space.connect
+      fr.of *
+    }
+  if$
+  editor empty$
+  editor field.or.null author field.or.null =
+  or
+    { key empty$
+	{ series empty$
+	    { "need editor, key, or series for " cite$ * " to crossref " *
+	      crossref * warning$
+	      "" *
+	    }
+	    { "{\em " * series * "\/}" * }
+	  if$
+	}
+	{ key * }
+      if$
+    }
+    { format.crossref.editor * }
+  if$
+  " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.incoll.inproc.crossref}
+{ editor empty$
+  editor field.or.null author field.or.null =
+  or
+    { key empty$
+	{ booktitle empty$
+	    { "need editor, key, or booktitle for " cite$ * " to crossref " *
+	      crossref * warning$
+	      ""
+	    }
+	    { fr.in "{\em " * booktitle * "\/}" * }
+	  if$
+	}
+	{ fr.in key * }
+      if$
+    }
+    { fr.in format.crossref.editor * }
+  if$
+  " \cite{" * crossref * "}" *
+}
+
+FUNCTION {article}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { journal emphasize "journal" output.check
+      format.vol.num.pages output
+      format.date "year" output.check
+    }
+    { format.article.crossref output.nonnull
+      format.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {book}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check }
+    { format.authors output.nonnull
+      crossref missing$
+	{ "author and editor" editor either.or.check }
+	'skip$
+      if$
+    }
+  if$
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    { new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  format.date "year" output.check
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {booklet}
+{ output.bibitem
+  format.authors output
+  new.block
+  format.title "title" output.check
+  howpublished address new.block.checkb
+  howpublished output
+  address output
+  format.date output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {inbook}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check }
+    { format.authors output.nonnull
+      crossref missing$
+	{ "author and editor" editor either.or.check }
+	'skip$
+      if$
+    }
+  if$
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    { format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  format.date "year" output.check
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {incollection}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.chapter.pages output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+      format.edition output
+      format.date "year" output.check
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.chapter.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {inproceedings}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.pages output
+      address empty$
+	{ organization publisher new.sentence.checkb
+	  organization output
+	  publisher output
+	  format.date "year" output.check
+	}
+	{ address output.nonnull
+	  format.date "year" output.check
+	  new.sentence
+	  organization output
+	  publisher output
+	}
+      if$
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {conference} { inproceedings }
+
+FUNCTION {manual}
+{ output.bibitem
+  author empty$
+    { organization empty$
+	'skip$
+	{ organization output.nonnull
+	  address output
+	}
+      if$
+    }
+    { format.authors output.nonnull }
+  if$
+  new.block
+  format.btitle "title" output.check
+  author empty$
+    { organization empty$
+	{ address new.block.checka
+	  address output
+	}
+	'skip$
+      if$
+    }
+    { organization address new.block.checkb
+      organization output
+      address output
+    }
+  if$
+  format.edition output
+  format.date output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {mastersthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  fr.master format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  format.date "year" output.check
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {misc}
+{ output.bibitem
+  format.authors output
+  title howpublished new.block.checkb
+  format.title output
+  howpublished new.block.checka
+  howpublished output
+  format.date output
+  new.block
+  note output
+  fin.entry
+  empty.misc.check
+}
+
+FUNCTION {phdthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.btitle "title" output.check
+  new.block
+  fr.phd format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  format.date "year" output.check
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {proceedings}
+{ output.bibitem
+  editor empty$
+    { organization output }
+    { format.editors output.nonnull }
+  if$
+  new.block
+  format.btitle "title" output.check
+  format.bvolume output
+  format.number.series output
+  address empty$
+    { editor empty$
+	{ publisher new.sentence.checka }
+	{ organization publisher new.sentence.checkb
+	  organization output
+	}
+      if$
+      publisher output
+      format.date "year" output.check
+    }
+    { address output.nonnull
+      format.date "year" output.check
+      new.sentence
+      editor empty$
+	'skip$
+	{ organization output }
+      if$
+      publisher output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {techreport}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  format.tr.number output.nonnull
+  institution "institution" output.check
+  address output
+  format.date "year" output.check
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {unpublished}
+{ output.bibitem
+  format.authors "author" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  note "note" output.check
+  format.date output
+  fin.entry
+}
+
+FUNCTION {default.type} { misc }
+
+MACRO {acmcs} {"ACM Computing Surveys"}
+MACRO {acta} {"Acta Informatica"}
+MACRO {cacm} {"Communications of the ACM"}
+MACRO {ibmjrd} {"IBM Journal of Research and Development"}
+MACRO {ibmsj} {"IBM Systems Journal"}
+MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
+MACRO {ieeetc} {"IEEE Transactions on Computers"}
+MACRO {ieeetcad}
+ {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
+MACRO {ipl} {"Information Processing Letters"}
+MACRO {jacm} {"Journal of the ACM"}
+MACRO {jcss} {"Journal of Computer and System Sciences"}
+MACRO {scp} {"Science of Computer Programming"}
+MACRO {sicomp} {"SIAM Journal on Computing"}
+MACRO {tocs} {"ACM Transactions on Computer Systems"}
+MACRO {tods} {"ACM Transactions on Database Systems"}
+MACRO {tog} {"ACM Transactions on Graphics"}
+MACRO {toms} {"ACM Transactions on Mathematical Software"}
+MACRO {toois} {"ACM Transactions on Office Information Systems"}
+MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
+MACRO {tcs} {"Theoretical Computer Science"}
+
+READ
+
+FUNCTION {sortify}
+{ purify$
+  "l" change.case$
+}
+
+INTEGERS { len }
+
+FUNCTION {chop.word}
+{ 's :=
+  'len :=
+  s #1 len substring$ =
+    { s len #1 + global.max$ substring$ }
+    's
+  if$
+}
+
+FUNCTION {sort.format.names}
+{ 's :=
+  #1 'nameptr :=
+  ""
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { nameptr #1 >
+        { "   " * }
+        'skip$
+      if$
+      s nameptr "{vv{ } }{ll{ }}{  ff{ }}{  jj{ }}" format.name$ 't :=
+      nameptr numnames = t "others" = and
+        { "et al" * }
+        { t sortify * }
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {sort.format.title}
+{ 't :=
+  "Le " #3
+    "La " #3
+      "Les " #4
+        "Un " #3
+          "Une " #4
+            "Des " #4
+	      "A " #2
+		"An " #3
+		  "The " #4 t chop.word
+		chop.word
+	      chop.word
+	    chop.word
+	  chop.word
+	chop.word
+      chop.word
+    chop.word
+  chop.word
+  sortify
+  #1 global.max$ substring$
+}
+
+FUNCTION {author.sort}
+{ author empty$
+    { key empty$
+        { "to sort, need author or key in " cite$ * warning$
+          ""
+        }
+        { key sortify }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+
+FUNCTION {author.editor.sort}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { "to sort, need author, editor, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { editor sort.format.names }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+
+FUNCTION {author.organization.sort}
+{ author empty$
+    { organization empty$
+        { key empty$
+            { "to sort, need author, organization, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { "Le " #3 
+	    "La " #3
+	      "Les " #4
+		"The " #4 organization chop.word  
+	      chop.word
+	    chop.word
+	  chop.word
+	  sortify }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+
+FUNCTION {editor.organization.sort}
+{ editor empty$
+    { organization empty$
+        { key empty$
+            { "to sort, need editor, organization, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { "Le " #3 
+	    "La " #3
+	      "Les " #4
+		"The " #4 organization chop.word 
+	      chop.word
+	    chop.word
+	  chop.word
+	  sortify }
+      if$
+    }
+    { editor sort.format.names }
+  if$
+}
+
+FUNCTION {presort}
+{ type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.sort
+    { type$ "proceedings" =
+        'editor.organization.sort
+        { type$ "manual" =
+            'author.organization.sort
+            'author.sort
+          if$
+        }
+      if$
+    }
+  if$
+  "    "
+  *
+  year field.or.null sortify
+  *
+  "    "
+  *
+  title field.or.null
+  sort.format.title
+  *
+  #1 entry.max$ substring$
+  'sort.key$ :=
+}
+
+ITERATE {presort}
+
+SORT
+
+STRINGS { longest.label }
+
+INTEGERS { number.label longest.label.width }
+
+FUNCTION {initialize.longest.label}
+{ "" 'longest.label :=
+  #1 'number.label :=
+  #0 'longest.label.width :=
+}
+
+FUNCTION {longest.label.pass}
+{ number.label int.to.str$ 'label :=
+  number.label #1 + 'number.label :=
+  label width$ longest.label.width >
+    { label 'longest.label :=
+      label width$ 'longest.label.width :=
+    }
+    'skip$
+  if$
+}
+
+EXECUTE {initialize.longest.label}
+
+ITERATE {longest.label.pass}
+
+FUNCTION {begin.bib}
+{ preamble$ empty$
+    'skip$
+    { preamble$ write$ newline$ }
+  if$
+  "\begin{thebibliography}{"  longest.label  * "}" * write$ newline$
+  "\expandafter\ifx\csname fonteauteurs\endcsname\relax" 
+    write$ newline$
+  "\def\fonteauteurs{" fr.auteurs.style * "}\fi" * write$ newline$ 
+}
+
+EXECUTE {begin.bib}
+
+EXECUTE {init.state.consts}
+
+ITERATE {call.type$}
+
+FUNCTION {end.bib}
+{ newline$
+  "\end{thebibliography}" write$ newline$
+}
+
+EXECUTE {end.bib}
+
+
+%%
+%% Changelog : 
+%% 1.0 -> 1.1 : les mois ne prennent pas de majuscule
+%% 1.1 -> 1.2 : le '.' qui suit la liste des auteurs peut etre 
+%% 		change en autre chose (':' notamment).
+%%
diff --git a/rapport.pdf b/rapport.pdf
index 8c7ac4d..d74f1c2 100644
--- a/rapport.pdf
+++ b/rapport.pdf
diff --git a/rapport.tex b/rapport.tex
index c40dae6..2e9efda 100644
--- a/rapport.tex
+++ b/rapport.tex
@@ -3,6 +3,7 @@
 \usepackage[utf8x]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{calc,amsmath,amssymb,amsthm}
+\usepackage{mathrsfs}
 \usepackage[hmargin=3.5cm]{geometry}
 \usepackage{}
 \newcommand{\trib}[1]{\mathcal{#1}}
@@ -12,11 +13,22 @@
 \newcommand{\prob}[1]{\mathbb{P}\left[#1\right]}
 \renewcommand{\geq}{\geqslant}
 \renewcommand{\leq}{\leqslant}
+\DeclareMathOperator*{\argmax}{arg\,max}
 \newsavebox{\fmbox}
-\newenvironment{encadre}[1]{\begin{lrbox}{\fmbox}%
-\begin{minipage}{\textwidth-3em}\vspace{\baselineskip}}{\vspace{0.5\baselineskip
-} \end{minipage} \end{lrbox} \begin{center}
-\fbox{\hspace{1em}\usebox{\fmbox}\hspace{1em}}\end{center}}
+\newenvironment{encadre}[1]
+{
+\begin{lrbox}{\fmbox}
+\begin{minipage}{\textwidth-3em}
+\vspace{0.3\baselineskip}\setlength{\parskip}{0.5em}
+}
+{
+\vspace{\baselineskip}
+\end{minipage}\end{lrbox}
+\begin{center}
+\fbox{\hspace{1em}\usebox{\fmbox}\hspace{1em}}
+\end{center}
+\vspace{0.5\baselineskip}
+}
 
 \author{Thibaut Horel}
 \title{Prédiction de suites individuelles}
@@ -46,57 +58,252 @@
 
 \maketitle
 
-\section{Prédiction randomisée dans un univers fini}
+\section{Introduction : Jeu répété de prévision et regret}
+
+\section{Prévision dans un univers fini}
+Dans cette partie on souhaite choisir un action parmi un ensemble fini
+$\mathcal{A}$ d'actions. On distingue plusieurs problèmes suivant l'information
+dont on dispose  :
+\begin{itemize}
+\item On parle de \emph{bandit à N bras} si à la fin de chaque tour
+on observe uniquement la perte correspondant à l'action choisie. L'appellation
+provient du monde des casinos où le terme \emph{bandit manchot} est synonyme de
+machine à sous. Un exemple de problème de \emph{bandit à N bras} consiste donc
+à jouer sur $N$ machines en même temps, en choisissant une seule machine à
+chaque tour. On ne reçoit donc que la perte de la machine sur laquelle on a
+joué.
+\item Par opposition, on parle de problème à information parfaite si on a accès
+à la perte de toutes les actions (même celles que l'on n'a pas choisies) à la
+fin de chaque tour.
+\end{itemize}
 
-\subsection{Introduction (A DETAILLER)}
-$N$ le nombre d'actions.
+Le comportement de l'environnement influe également sur la nature du problème :
+\begin{itemize}
+\item On parle d'\emph{environnement stochastique} si les pertes reçues sont
+distribuées suivant une loi fixée à l'avance.
+\item Si l'environnement choisit de façon déterministe (et sans faire appel à
+une randomisation externe) les pertes en fonction des actions passées et de
+notre stratégie, il est dit \emph{adversaire déterministe}.
+\end{itemize}
 
-Une stratégie est une suite de lois de probabilités
-$(\mathbf{p_t})_{t\in\set{N}^*}$ avec $\mathbf{p_t}\in\set{R}^N$.
+\subsection{Bandits stochastiques}
+On joue ici sur $N$ machines à sous régies par une famille de lois de
+probabilités $(P_1,\ldots,P_N)$. Au tour $t$, le gain donné par la machine $i$
+est donc une variable aléatoire $G_{i,t}$ à valeurs dans $[0,1]$. La famille
+$(G_{1,t},\ldots,G_{N,t})$ suit la loi $P_1\otimes\ldots\otimes P_N$. Le jeu a
+donc la forme suivante :
 
-La stratégie de l'adversaire est une suite de vecteur de pertes
-$(\mathbf{l_t})_{t\in\set{N}^*}$ avec $\mathbf{l_t}\in\set{R}^N$.
+\begin{encadre}{\textwidth}
+\begin{center}
+\textbf{Jeu contre des bandits stochastiques}
+\end{center}
 
-À chaque tour, on tire $I_t$ suivant la loi $\mathbf{p_t}$.
+À chaque tour $t\geq 1$ :
+\begin{enumerate}
+\item Le joueur choisit un bras $I_t$.
+\item Les gains $(G_{1,t},\ldots,G_{N,t})$ sont tirées suivant la loi
+$P_1\otimes\ldots\otimes P_N$.
+\item Le joueur reçoit (et observe) le gain $G_{I_t,t}$. 
+\end{enumerate}
+\end{encadre}
 
-On définit le regret pour la stratégie $\mathbf{p}$ contre la stratégie
-$\mathbf{l}$ :
+La stratégie du joueur est donc une fonction $S$ :
 \[
-R_T(\mathbf{p},\mathbf{l})=\sum_{t=1}^T l_{I_t,t}-\min_{1\leq i\leq N}L_{i,T}
+S : \bigsqcup_{t\geq
+1}\big(\mathcal{A}\times[0,1]\big)^t\longrightarrow\mathcal{A}
 \]
-et le regret espéré :
+qui donne le bras à jouer en fonction du passé : $I_{t+1} =
+S\big((I_1,G_{I_1,1}),\ldots,(I_t,G_{I_t,t})\big)$.
+
+La stratégie est donc déterministe, c'est à dire que le joueur choisit le bras
+à jouer à l'instant $t$ de façon déterministe en fonction du passé, sans faire
+appel à une randomisation externe.
+
+Si on note $\trib{F}_t$ la tribu engendrée par $(G_{I_1,1},\ldots,G_{I_t,t})$,
+on voit facilement que $I_t$ est mesurable par rapport à la tribu
+$\trib{F}_{t-1}$.
+
+On note $\mu_i$ l'espérance de $P_i$, et $\mu^*=\max_{1\leq i\leq N} \mu_i$
+(plus généralement on notera avec un exposant étoile toute grandeur rattachée à
+un bras d'espérance maximale). Enfin on note :
 \[
-R_T^*(\mathbf{p},\mathbf{l}) =
-\sum_{t=1}^T\sum_{i=1}^N p_{i,t}l_{i,t}
--\min_{1\leq i\leq N}L_{i,T}
+\Delta_i = \mu^*-\mu_i
 \]
 
-Si l'on souhaite raisonner avec les gains, en posant $g_{i,t}=1-l_{i,t}$, on
-définit le regret en terme de gains :
+
+On souhaite avoir une borne sur le regret défini par :
 \[
-R_T(\mathbf{p},\mathbf{g})=
-\max_{1\leq i\leq N}G_{i,T}
--\sum_{t=1}^T g_{I_t,t}
+R_T = \esp{T\mu^* - \sum_{t=1}^T G_{I_t,t}}
+=T\mu^* - \sum_{i=1}^N \mu_i \esp{N_i(T)}
+=\sum_{i=1}^N \esp{N_i(T)}\Delta_i
 \]
-et le regret espéré en termes de gains :
+où $N_i(T)$ désigne le nombre de fois que le bras $i$ a été joué jusqu'à
+l'instant $t$ :
 \[
-R_T^*(\mathbf{p},\mathbf{g}) =
-\max_{1\leq i\leq N}G_{i,T}
--\sum_{t=1}^T\sum_{i=1}^N p_{i,t}g_{i,t}
+N_i(T)=\sum_{t=1}^T \mathbf{1}_{\{I_t=i\}}
+\]
+
+On utilise la stratégie de prévision suivante :
+
+\begin{encadre}{\textwidth}
+\begin{center}
+\textbf{Stratégie \textsc{ucb} pour les bandits stochastiques}
+\end{center}
+
+\textbf{Initialisation :} On joue chaque bras une fois au cours des $N$
+premiers tours.
+
+À chaque tour $t> N$ :
+\begin{enumerate}
+\item On estime $\mu_i$ à l'aide des données passées :
+\[
+\widehat{\mu}_{i,t-1} = \frac{1}{N_i(t-1)}
+\sum_{k=1}^{t-1}G_{I_k,k}\mathbf{1}_{\{I_k=i\}}
+\]
+\item On choisit un bras $I_t$ qui vérifie :
+\[
+I_t= \argmax_{1\leq i\leq N}
+\left(\widehat{\mu}_{i,t-1}+\sqrt{\frac{2\ln (t-1)}{N_i(t-1)}}\;\right)
 \]
+\item On observe $G_{I_t,t}$. 
+\end{enumerate}
+\end{encadre}
 
-\begin{rem}
-Les symboles $R_T$ et $R_T^*$ n'ont pas la même définition suivant que l'on
-parle de gains et de pertes. Si $\mathbf{g}$ est la suite de gains associée à
-la suite de pertes $\mathbf{l}$, on a les égalités :
+\begin{thm}
+Si les lois $(P_1,\ldots,P_N)$ des machines sont à support dans $[0,1]$, le
+regret au temps T vérifie :
 \[
-R_T(\mathbf{p},\mathbf{l})=R_T(\mathbf{p},\mathbf{g})
+R_T\leq
+\left(8\sum_{\mu_i<\mu^*}\frac{1}{\Delta_i}\right) \ln T
++\left(1+\frac{\pi^2}{3}\right)\left(\sum_{i=1}^N \Delta_i\right)
+\]
+\end{thm}
+
+\begin{proof}On note :
+\[
+c_{t,s}=\sqrt{\frac{2\ln t}{s}}
+\]
+
+Vu la forme du regret, on va majorer $\esp{N_i(T)}$. Dans cette preuve, on
+notera $\{A\}$ au lieu de $\mathbf{1}_A$. Pour tout entier $\ell\geq 1$, on a :
+\[
+\begin{split}
+N_i(T)&\leq \ell + \sum_{t=N+1}^T \left\{I_t=i,\ N_i(t-1)\geq l\right\}\\
+&\leq\ell + \sum_{t=N+1}^T\left\{\widehat{\mu}^*_{t-1}+c_{t-1,N^*(t-1)}
+\leq \widehat{\mu}_{i,t-1}+c_{t-1,N_i(t-1)},\ N_i(t-1)\geq l\right\}\\
+&\leq\ell + \sum_{t=N+1}^{T}\sum_{s=1}^{t-1}\sum_{s'=l}^{t-1}
+\left\{\bar{X}^*_{s}+c_{t-1,s}
+\leq\bar{X}_{i,s'}+c_{t-1,s'}\right\}
+\end{split}
+\]
+où on a noté :
+\[
+\bar{X}_{i,s}=\frac{1}{s}\sum_{k=1}^s X_{i,k}
 \quad\mathrm{et}\quad
-R_T^*(\mathbf{p},\mathbf{l})=R_T^*(\mathbf{p},\mathbf{g})
+\bar{X}^*_{s}=\frac{1}{s}\sum_{k=1}^s X^*_{k}
+\]
+avec $(X_{i,k})_{k\geq 1}$ une suite iid de variables aléatoires de loi $P_i$
+et $(X^*_{k})_{k\geq 1}$ une suite iid de variables aléatoires de loi $P^*$.
+Le dernier évènement qui apparaît dans la somme est inclus dans l'union des
+trois événements suivants :
+\begin{equation}\label{ev1}
+\bar{X}^*_{s}\leq\mu^*-c_{t-1,s}
+\end{equation}
+\begin{equation}\label{ev2}
+\bar{X}_{i,s}\geq\mu_i+c_{t-1,s'}
+\end{equation}
+\begin{equation}\label{ev3}
+\mu^*<\mu_i + 2 c_{t-1,s'}
+\end{equation}
+
+L'inégalité de Hoeffding permet de majorer les probabilités des événements
+\eqref{ev1} et \eqref{ev2} par $\exp(-4\ln t)=t^{-4}$. Enfin, si
+$\ell=\lceil (8\ln T)/\Delta_i^2\rceil$, l'événement \eqref{ev3} est de
+probabilité nulle. En effet, on a alors :
+\[
+2c_{t-1,s'}\leq 2\sqrt{\frac{2\Delta_i^2\ln (t-1)}{8\ln T}}\leq\Delta_i =
+\mu^*-\mu_i
+\]
+
+On a donc :
+\[
+\esp{N_i(T)}\leq \left\lceil \frac{8\ln T}{\Delta_i^2}\right\rceil
++\sum_{t=1}^{\infty}t^2\frac{2}{t^4}
+\]
+
+On multiplie alors par $\Delta_i$ et on somme sur $1\leq i \leq N$ pour avoir
+le résultat.
+\end{proof}
+
+\subsection{Adversaire déterministe et information totale}
+Maintenant, l'environnement se comporte comme un adversaire : il connaît notre
+stratégie, observe nos actions passées, et peut donc choisir les pertes de façon
+à maximiser notre regret. Face à un tel adversaire, on s'autorise à faire
+appel à une randomisation externe dans notre stratégie. Ceci rétablit une
+certaine forme d'équilibre car l'adversaire n'a pas de contrôle sur l'aléa. La
+prévision prend alors la forme du jeu répété suivant :
+
+\begin{encadre}{\textwidth}
+\begin{center}
+\textbf{Jeu de prévision contre un adversaire déterministe}
+\end{center}
+À chaque tour $t\geq 1$ :
+\begin{enumerate}
+\item Le joueur choisi un loi de probabilité sur $\mathcal{A}$,
+$P_t=(p_{1,t},\ldots,p_{N,t})$
+\item Simultanément, l'adversaire choisi un vecteur de perte,
+$\ell_t=(\ell_{1,t},\ldots,\ell_{N,t})$
+\item Le joueur tire $I_t$ suivant la loi $P_t$ et choisit l'action
+$I_t$. Il reçoit la perte $\ell_{I_t,t}$.
+\item Le joueur et l'adversaire observent le vecteur de perte $\ell_t$ et
+l'action $I_t$.
+\end{enumerate}
+\end{encadre}
+
+On note $\Delta(\mathcal{A})$ l'ensemble des lois de probabilités sur
+$\mathcal{A}$ (qui s'injecte canoniquement dans $\mathcal{A}^N$).
+
+La stratégie de prévision du joueur est donc la donnée d'une loi initiale
+$P_1$, et d'une fonction $P$ :
+\[
+P : \bigsqcup_{t\geq 1}\left(\mathcal{A}\times\set{R}^N\right)^{t}
+\longrightarrow\Delta(\mathcal{A})
+\]
+qui donne la loi $P_t$ en fonction des actions et des pertes passées.
+
+La stratégie de l'adversaire est une fonction $E$ :
+\[
+\ell : \bigsqcup_{t\geq 1}\left(\mathcal{A}\times\set{R}^N\right)^{t}
+\longrightarrow\mathcal{A}^N
+\]
+qui donne le vecteur de perte $\ell_t$ en fonction du passé.
+
+Si on note $\trib{F}_t$ la tribu engendrée par $(I_1,\ldots,I_t)$,
+l'information disponible au début du tour $t$ est donc la tribu
+$\trib{F}_{t-1}$ et les variables $P_t$ et $\ell_t$ sont
+mesurables par rapport à cette tribu.
+
+Le regret jusqu'au temps $T$ de la stratégie $P$ contre la stratégie $\ell$ est
+défini par :
+\[
+R_T(P,\ell)
+=\sum_{t=1}^T \ell_{I_t,t}-\min_{1\leq i\leq N}L_{i,T}
+=\sum_{t=1}^T \ell_{I_t,t}-\min_{1\leq i\leq N}\sum_{t=1}^T \ell_{i,t}
+\]
+et le regret espéré :
+\[
+\bar{R}_T(P,\ell) = \esp{R_T} =
+\sum_{t=1}^T\sum_{i=1}^N p_{i,t}\ell_{i,t}
+-\min_{1\leq i\leq N}L_{i,T}
 \]
-\end{rem}
 
-\subsection{Stratégie exponentielle et information totale}
+Par la suite, on raisonnera plutôt en termes de gains : on pose
+$g_{i,t}=1-\ell_{i,t}$, on a donc :
+\[
+\bar{R}_T(P,\ell) = 
+\max_{1\leq i\leq N}\sum_{t=1}^T g_{i,t}
+-\sum_{t=1}^T\sum_{i=1}^N p_{i,t}g_{i,t}
+\]
 
 \begin{encadre}{\textwidth}
 \begin{center}
@@ -110,8 +317,8 @@ $i\in\{1,\ldots,N\}$.
 
 \begin{enumerate}
 \item On tire $I_t\in\{1,\ldots,N\}$ suivant la loi
-$\mathbf{p_t}=(p_{1,t},\ldots,p_{N,t})$.
-\item On observe les gains $\mathbf{g_t}=(g_{1,t},\ldots,g_{N,t})$.
+$P_t=(p_{1,t},\ldots,p_{N,t})$.
+\item On observe les gains $g_t=(g_{1,t},\ldots,g_{N,t})$.
 \item On met à jour les poids exponentiels :
 $w_{i,t}=w_{i,t-1}\exp(\eta g_{i,t})$.
 \item On calcule la distribution pour le tour suivant :
@@ -123,10 +330,11 @@ p_{i,t+1}=w_{i,t}/W_t
 \end{encadre}
 
 \begin{thm}\label{infototale}
-On suppose que les gains sont bornés par $K\geq 0$, alors, pour tout $\eta$ tel
+On suppose que les gains sont à valeurs dans $]-\infty, K]$, alors, pour
+tout $\eta$ tel
 que $\eta K\leq 1$ :
 \[
-R_n^* \leq\frac{\ln N}{\eta}
+\bar{R}_T \leq\frac{\ln N}{\eta}
 +\eta\sum_{t=1}^T\sum_{i=1}^N p_{i,t}g_{i,t}^2
 \]
 \end{thm}
@@ -166,7 +374,13 @@ Enfin on réordonne et on divise par $\eta$.
 \end{proof}
 
 \subsection{Bandits à $N$ bras}
-On utilise la stratégie de prédiction suivante :
+
+Contrairement au paragraphe précédent, on observe à la fin de chaque tour
+uniquement le gain correspondant à l'action choisie. On ne peut donc pas
+utiliser directement la stratégie par poids exponentiels, car on ne peut plus
+évaluer la perte cumulée de chaque action. L'idée est donc d'estimer les pertes
+qui ne nous sont pas révélées. On utilise pour cela un estimateur biaisé (grâce
+au paramètre $\beta$) : 
 
 \begin{encadre}{\textwidth}
 \begin{center}
@@ -180,9 +394,9 @@ $i\in\{1,\ldots,N\}$.
 
 \begin{enumerate}
 \item On tire $I_t\in\{1,\ldots,N\}$ suivant la loi
-$\mathbf{p_t}=(p_{1,t},\ldots,p_{N,t})$.
+$P_t=(p_{1,t},\ldots,p_{N,t})$.
 \item On estime les gains
-$\mathbf{\widetilde{g}}=(\widetilde{g}_{1,t},\ldots,\widetilde{g}_{N,t})$ à
+$\widetilde{g}=(\widetilde{g}_{1,t},\ldots,\widetilde{g}_{N,t})$ à
 partir du gain observé $g_{I_t,t}$
 \[
 \widetilde{g}_{i,t}=\frac{1}{p_{i,t}}\left(g_{I_t,t}\mathbf{1}_{\{I_t=i\}}
@@ -209,167 +423,328 @@ Avec  :
 
 \subsection{Preuve}
 On a une information totale sur les gains estimés, donc en notant
-$q_{i,t}=w_{i,t}/W_t$ et en appliquant le résultat du théorème \ref{infototale}
-: 
-\begin{equation}\label{fullinfo}
-R_T^*(\mathbf{q},\mathbf{\widetilde{g}})\leq\frac{\ln N}{\eta}
+$q_{i,t}=w_{i,t-1}/W_{t-1}$ et en appliquant le résultat du théorème
+\ref{infototale} : 
+\[
+\bar{R}_T(Q,\widetilde{g})
+=\max_{1\leq i\leq N}\sum_{t=1}^T \widetilde{g}_{i,t}
+-\sum_{t=1}^T\sum_{i=1}^N q_{i,t}g_{i,t}
+\leq\frac{\ln N}{\eta}
 +\eta\sum_{t=1}^T\sum_{i=1}^N q_{i,t}\widetilde{g}_{i,t}^2
-\end{equation}
+\]
 
-puis en utilisant que $p_{i,t}\geq q_{i,t}(1-\gamma)$
+puis en multipliant par $1-\gamma$ et en remarquant que 
+$p_{i,t}\geq q_{i,t}(1-\gamma)$ :
 \[
-\begin{split}
-R_T^*(\mathbf{p},\mathbf{\widetilde{g}})
-&\leq\max_{1\leq i\leq N}\widetilde{G}_{i,T}
--(1-\gamma)\sum_{t=1}^T\sum_{i=1}^N q_{i,t}\widetilde{g}_{i,t}\\
-&=(1-\gamma)R_n^*(\mathbf{q},\mathbf{\widetilde{g}})
-+\gamma\max_{1\leq i\leq N}\widetilde{G}_{i,T}
-\end{split}
+(1-\gamma)\max_{1\leq i\leq N}\sum_{t=1}^T \widetilde{g}_{i,t}
+-\sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}
+\leq\frac{\ln N}{\eta}
++\eta\sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}^2
 \]
-et en utilisant \eqref{fullinfo} :
+d'où en réordonnant et en utilisant la définition du regret :
 \begin{equation}\label{base}
-R_T^*(\mathbf{p},\mathbf{\widetilde{g}})
-\leq \frac{\ln N}{\eta}
+\bar{R}_T(P,\widetilde{g})
+=\max_{1\leq i\leq N}\sum_{t=1}^T \widetilde{g}_{i,t}
+-\sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}
+\leq\frac{\ln N}{\eta}
 +\eta\sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}^2
 +\gamma\max_{1\leq i\leq N}\widetilde{G}_{i,T}
 \end{equation}
 
 Vu le choix de l'estimateur de gain, on a :
-\[
+\begin{equation}\label{estimation}
 \sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}
 =\sum_{t=1}^T\sum_{i=1}^N
 \left(g_{I_t,t}\mathbf{1}_{\{I_t=i\}}+\beta\right)=\sum_{t=1}^T g_{I_t,t}
 +TN\beta
-\]
+\end{equation}
 et :
-\[
+\begin{equation}\label{estimation2}
 \sum_{t=1}^T\sum_{i=1}^N p_{i,t}\widetilde{g}_{i,t}^2
+\leq\sum_{i=1}^N\sum_{t=1}^T\widetilde{g}_{i,t}\left(g_{I_t,t}\mathbf{1}_{\{
+I_t=i\}}+\beta\right)
 \leq(1+\beta)\sum_{i=1}^N\widetilde{G}_{i,t}
 \leq(1+\beta)N\max_{1\leq i\leq N}\widetilde{G}_{i,t}
-\]
+\end{equation}
 
-D'où on déduit de \eqref{base}, en posant $C=\gamma+\eta(1+\beta)N$:
+En utilisant \eqref{estimation}, on a :
+\[
+\bar{R}_T(P,\widetilde{g})
+=\max_{1\leq i\leq N}\widetilde{G}_{i,T}
+-\sum_{t=1}^T g_{I_t,t} - TN\beta
+=R_T(P,g)
++\max_{1\leq i\leq N}\widetilde{G}_{i,T}
+-\max_{1\leq i\leq N}G_{i,T}
+-TN\beta
+\]
+On injecte dans\eqref{base} et on utilise la majoration \eqref{estimation2} :
 \[
-R_T(\mathbf{p},\mathbf{g})
+R_T(P,g)
 +\max_{1\leq i\leq N}\widetilde{G}_{i,T}
 -\max_{1\leq i\leq N}G_{i,T}
 \leq \frac{\ln N}{\eta}
++\big(\gamma+\eta(1+\beta) N\big)\max_{1\leq i\leq N}\widetilde{G}_{i,t}
 +TN\beta
-+C\max_{1\leq i\leq N}\widetilde{G}_{i,t}
 \]
+\[
+\begin{split}
+R_T(P,g)
+&\leq \frac{\ln N}{\eta}
++\big(1-\gamma-\eta(1+\beta) N\big)\left(\max_{1\leq i\leq N}G_{i,T}
+-\max_{1\leq i\leq N}\widetilde{G}_{i,t}\right)\\
+&\qquad+\big(\gamma+\eta(1+\beta) N\big)\max_{1\leq i\leq N}G_{i,T}+TN\beta
+\end{split}
+\]
+
+puis vu que $1-\gamma-\eta(1+\beta) N\leq 1$ et $\max_{1\leq i\leq N}
+G_{i,T}\leq T$
+
 \begin{equation}\label{etape2}
-R_T(\mathbf{p},\mathbf{g})
+R_T(P,g)
 \leq \frac{\ln N}{\eta}
-+TN\beta
-+\max_{1\leq i\leq N}G_{i,T}
-+(C-1)\max_{1\leq i\leq N}\widetilde{G}_{i,t}
++\left(\max_{1\leq i\leq N}G_{i,T}
+-\max_{1\leq i\leq N}\widetilde{G}_{i,t}\right)
++\big(\gamma+\eta(1+\beta)N\big)T+TN\beta
 \end{equation}
 
-% On souhaite maintenant relier les gains estimés aux gains
-% réels. On remarque que :
-% \[
-% \espc{g_{i,t}-\widetilde{g}_{i,t}}{\trib{F}_{t-1}}=-\frac{\beta}{p_{i,t}}
-% .\]
-% Donc le processus $(X_t)$ défini par
-% \[
-% X_t=g_{i,t}-\widetilde{g}_{i,t}+\frac{\beta}{p_{i,t}}
-% ,\]
-% est une suite d'accroissements de martingale.
-
-On souhaite maintenant relier les gains estimés aux gains
-réels. On choisit $\beta=\sqrt{\ln(N/\delta)/TN}$, alors avec probabilité
-supérieure à $1-\delta$ :
+Il nous reste à majorer l'écart entre les gains et les gains estimés. On
+souhaite appliquer le corollaire \ref{megabernstein} pour la suite
+d'accroissement de martingale $(X_{i,t})_{1\leq t\leq T}$ définie par :
 \[
-\max_{1\leq i\leq N} G_{i,T}-\max_{1\leq i\leq N}\widetilde{G}_{i,T}
-\leq TN\beta = \sqrt{TN\ln\left(\frac{N}{\delta}\right)}
+X_{i,t}=g_{i,t}-\widetilde{g}_{i,t}+\frac{\beta}{p_{i,t}}
+= g_{i,t}\left(1-\frac{\mathbf{1}_{\{I_t=i\}}}{p_{i,t}}\right)
 \]
-d'où en remarquant que $0\leq 1-C\leq 1$ :
+Cette suite est clairement une suite d'accroissements de martingale car on a
+retranché le terme de biais. La variance conditionnelle vaut :
 \[
-(1-C)\max_{1\leq i\leq N} G_{i,T}
-+(C-1)\max_{1\leq i\leq N}\widetilde{G}_{i,T}
-\leq TN\beta
+\begin{split}
+\mathbb{E}_t[X_{i,t}^2]=\espc{X_{i,t}^2}{\trib{F}_{t-1}}
+&=g_{i,t}^2\,\mathbb{E}_t\Bigg[\bigg(1-\frac{\mathbf{1}_{\{I_t=i\}}}{p_{i,t}}
+\bigg)^2\Bigg]\\
+&=g_{i,t}^2\left(\frac{1}{p_{i,t}}-1\right)
+\end{split}
+\]
+On applique le corollaire \ref{megabernstein} avec $K=1$ et
+$\sigma_T^2=NT/\gamma$. On a donc avec probabilité supérieure à $1-\delta/N$ :
+\[
+G_{i,T}-\widetilde{G}_{i,T} + \beta\sum_{t=1}^T\frac{1}{p_{i,t}}
+\leq \sqrt{3\sum_{t=1}^T\frac{1}{p_{i,t}}\ln\left(\frac{N\ln
+(NT/\gamma)}{\delta/4}\right)}
++\frac{2}{3}\ln\left(\frac{N\ln(NT/\gamma)}{\delta/4}\right)
 \]
-puis, vu que $\max_{1\leq i\leq N} G_{i,T}\leq T$ :
+puis en majorant l'intersection des évènements précédents, on a avec
+probabilité supérieure à $1-\delta/N$ :
 \[
-\max_{1\leq i\leq N} G_{i,T}
-+(C-1)\max_{1\leq i\leq N}\widetilde{G}_{i,T}
-\leq TN\beta+CT
+a
 \]
+
 en injectant cette dernière majoration dans \eqref{etape2}, on obtient
 finalement :
 \[
-R_T(\mathbf{p},\mathbf{g})
+R_T(P,g)
 \leq \frac{\ln N}{\eta}
-+2TN\beta+\big(\gamma+\eta(1+\beta)\big)T
++\beta T(N-1)+\big(\gamma+\eta(1+\beta)\big)T
++\sqrt{2\frac{NT}{\gamma}\ln\left(\frac{1}{\delta}\right)}
++\frac{2}{3}\ln\left(\frac{1}{\delta}\right)
 \]
 
-\section{Prédiction dans un univers convexe}
-Soit $F$ un convexe compact de $\set{R}^n$. À chaque tour on choisit un
-élément $x_t$ dans $F$ tandis que l'adversaire choisit une fonction de perte
-convexe $l_t : F\longrightarrow \set{R}^+$.
+\section{Prévision dans un univers convexe}
+
+Soit $C$ un convexe compact de $\set{R}^n$. On joue au même jeu que
+précédemment, mais au lieu de choisir une action parmi un ensemble fini, on
+choisit un élément de $C$. L'environnement ne choisit donc pas un vecteur de
+perte, mais une fonction de perte définie sur $C$ tout entier.
 
-On définit le regret par rapport à une stratégie fixe :
+On note $\Pi_C$ la projection sur le convexe $C$ définie comme au paragraphe
+\ref{projection}.
+
+
+\subsection{Cas de l'information parfaite}
+
+Ici, le joueur et l'environnement choisissent leurs actions de façon
+déterministe
+en fonction du passé :
+
+\begin{encadre}{\textwidth}
+\begin{center}
+\textbf{Jeu à information parfaite dans un univers convexe}
+\end{center}
+À chaque tour $t\geq 1$ :
+\begin{itemize}
+\item Le joueur choisit un vecteur $x_t\in C$.
+\item Simultanément, l'adversaire choisit une fonction de perte convexe $\ell_t
+: C\to\set{R}^+$.
+\item Le joueur et l'environnement observent la fonction $\ell_t$ et reçoit la
+perte
+$\ell_t(x_t)$.
+\end{itemize}
+\end{encadre}
+
+Plus précisément, notons $\mathscr{C}$ l'ensemble des fonctions de $C$ dans
+$\set{R}^+$. Le passé avant $t$, noté $H_t$, est une partie de $(C\times
+\mathscr{C})^{t-1}$ :
 \[
-R_T(\mathbf{x},\mathbf{l})=\sum_{t=1}^T l_t(x_t) - \min_{x\in F}\sum_{t=1}^T
-l_t(x)
+H_t = \big((x_1,\ell_1),\ldots,(x_{t-1},\ell_{t-1})\big)
 \]
 
-On veut obtenir des bornes sur le regret valables pour toute stratégie de
-l'adversaire.
+La stratégie du joueur est donc une fonction $S$ : 
+\[
+S : \bigsqcup_{t\geq 1}(C\times\mathscr{C})^t 
+\longrightarrow C
+\]
+et on a $x_t = S(H_t)$.
 
-\subsection{Réduction au cas des pertes linéaires}
+De même, la stratégie de l'environnement est une fonction $E$ :
+\[
+E : \bigsqcup_{t\geq 1}(C\times\mathscr{C})^t 
+\longrightarrow \mathscr{C}
+\]
+avec $\ell_t = E(H_t)$.
+
+On définit alors le regret pour la stratégie $S$ contre la stratégie $E$ par :
+\[
+R_T(S,E)=\sum_{t=1}^T \ell_t(x_t)-\min_{x\in C}\sum_{t=1}^T\ell_t(x)
+=\max_{x\in C}\left(\sum_{t=1}^T \ell_t(x_t)-\ell_t(x)\right)
+\]
 
 \begin{prop}
-Pour toute stratégie convexe $\mathbf{l}$ de l'adversaire, il existe une
-stratégie linéaire $\mathbf{l'}$ de l'adversaire telle que :
+Soit $(\ell_1,\ldots,\ell_T)$ une famille de fonctions de pertes convexes et
+différentiables, alors on a :
 \[
-R_T(\mathbf{x},\mathbf{l})\leq R_T(\mathbf{x},\mathbf{l'})
+R_T\leq
+\sum_{t=1}^T \nabla \ell_t(x_t)\cdot x_t-\min_{x\in C}\sum_{t=1}^T
+\nabla \ell_t(x_t)\cdot x
 \]
 \end{prop}
 
-\begin{proof} (A PRECISER)
-Il suffit de considérer une suite de sous-gradients des fonctions $l_t$.
+\begin{proof}
+Soit $t\in\{1,\ldots,T\}$, $\ell_t$ est au-dessus de ses tangentes, d'où :
+\[
+\forall x\in C,\quad
+\ell_t(x_t)-\ell_t(x)
+\leq \nabla \ell_t(x_t)\cdot(x_t-x)
+\]
+
+En sommant sur $t$, on obtient :
+\[
+\forall x\in C,\quad
+\sum_{t=1}^T \ell_t(x_t)-\ell_t(x)
+\leq \sum_{t=1}^T \ell'_t\cdot(x_t- x)
+\]
+
+On prend alors le maximum sur $x\in C$ pour obtenir le résultat.
 \end{proof}
 
-On peut donc se restreindre à l'étude du regret pour des fonctions de perte
-linéaire. Dans ce cas on identifie la fonction de perte est représentée par un
-vecteur que l'on nomme également $l_t$. On a donc $l_t(x_t)=l_t\cdot x_t$.
+Le terme de droite de l'inégalité de la proposition précédente est un terme de
+regret pour la suite de fonctions de perte $(\nabla \ell_t(x_t)$. À partir
+d'une borne sur le regret valable pour toutes fonctions de perte linéaires, on
+peut donc obtenir une borne valable pour des fonctions de perte convexes.
 
-\subsection{Cas de l'information totale}
+On supposera donc par la suite que les fonctions de pertes sont linéaires, on
+peut donc les assimiler au produit scalaire par un vecteur de $\set{R}^N$, que
+l'on notera également $\ell_t$ par abus.
 
-On considère ici qu'à chaque tour on observe le vecteur $l_t$ (et pas seulement
-$l_t\cdot x_t$).
-On fixe une suite $(\eta_t)_{t\in\set{N}^*}$. On utilise alors la stratégie
-suivante :
-\[
-x_{t+1}=p_F(x_t-\eta_t l_t)
-\]
-où $p_F$ est la projection sur le convexe fermé $F$ comme rappelée en
-appendice.
+\begin{encadre}{\textwidth}
+\begin{center}
+\textbf{Descente de gradient}
+\end{center}
 
-Dans le cas d'une fonction de perte linéaire, le vecteur $l_t$ s'identifie au
-gradient de la fonction de perte. La stratégie ci-dessus s'apparente donc à une
-descente de gradient gloutonne.
+\textbf{Donnée :} une suite décroissante $(\eta_t)_{t\in\set{N}^*}$ de réels
+strictement positifs.
+
+\textbf{Initialisation :} On joue $x_1$ n'importe où dans $C$.
+
+À chaque tour $t\geq 2$ :
+\begin{itemize}
+\item On calcule $y_t = x_{t-1}-\eta_{t-1} \ell_{t-1}$
+\item On joue $x_t = \Pi_C(y_t)$
+\end{itemize}
+\end{encadre}
 
 \begin{thm}
-Si toutes les fonctions de pertes sont bornées par la même constante $K$ et que
-l'on note $D$ le diamètre du compact $F$, alors on a :
+On note $D$ le diamètre de $C$ pour la norme euclidienne. On suppose que les
+vecteurs de perte $\ell_t$ sont bornés par une constante $K$ pour cette même
+norme. Alors, pour la stratégie de descente de gradient :
 \[
-R_T(\mathbf{x},\mathbf{l})
+R_T=\sum_{t=1}^T\ell_t\cdot x_t - \min_{x\in C}\sum_{t=1}^T\ell_t\cdot x
 \leq\frac{D^2}{2\eta_T}
 +\frac{K^2}{2}\sum_{t=1}^T \eta_t
 \]
 
 En particulier, en prenant $\eta_t=\frac{1}{\sqrt{t}}$, on obtient :
 \[
-R_T(\mathbf{x},\mathbf{l})
+R_T
 \leq\frac{D^2}{2}\sqrt{T}
 +K^2\left(\sqrt{T}-\frac{1}{2}\right)
 \]
 \end{thm}
 
-\begin{proof} (A PRECISER) majoration du corollaire \ref{projection} +
-transformation d'Abel.
+\begin{proof}
+Soit $x\in C$, d'après l'inégalité \eqref{inegprojection}, on a :
+\[
+\forall t\geq 0,\quad \| x_{t+1} -x\|^2= \| \Pi_C(y_{t+1}) -x\|^2
+\leq \|y_{t+1} -x\|^2
+\]
+D'où :
+\[
+\begin{split}
+\| x_{t+1} -x\|^2
+&\leq \| x_t-\eta_t\ell_t -x\|^2\\
+&=\| x_t -x\|^2-2\eta_t\ell_t\cdot(x_t-x)+\eta_t^2\|\ell_t\|^2
+\end{split}
+\]
+Puis en réordonnant et en majorant $\|\ell_t\|$ par $K$ :
+\[
+\ell_t\cdot(x_t-x)
+\leq \frac{1}{2\eta_t}\big(\| x_t -x\|^2-\| x_{t+1} -x\|^2\big)
++\frac{\eta_t}{2}K^2
+\]
+
+On somme pour $t\in\{1,\ldots,T\}$ :
+\[
+\sum_{t=1}^T \ell_t\cdot(x_t-x)
+\leq \frac{1}{2\eta_1}\| x_1 -x\|^2 
++\frac{1}{2}
+\sum_{t=2}^T\left(\frac{1}{\eta_t}-\frac{1}{\eta_{t-1}}\right)\| x_t-x\|^2
+-\frac{1}{2\eta_T}\| x_{T+1} -x\|^2 
++\frac{K^2}{2}\sum_{t=1}^T \eta_t
+\]
+
+En majorant $\| x_t-x\|^2$ par $D^2$, on fait apparaître une somme
+télescopique :
+\[
+ \sum_{t=1}^T \ell_t\cdot(x_t-x)
+\leq \frac{D^2}{2\eta_T}+\frac{K^2}{2}\sum_{t=1}^T \eta_t
+\]
+
+On obtient la première inégalité du théorème en prenant le maximum sur $x\in
+C$. Pour la seconde inégalité, il suffit de majorer $\sum_{t=1}^T 1/\sqrt{t}$ :
+\[
+\sum_{t=1}^T \frac{1}{\sqrt{t}}
+\leq 1+\int_{1}^T \frac{\mathrm{d}t}{\sqrt t}
+=2\sqrt T -1
+\qedhere\]
+\end{proof}
+
+Dans le théorème précédent, le choix du $\eta_t$ est arbitraire. Le choix
+optimal pour $\eta_t$ dépend bien sûr des constantes $K$ et $D$. Cependant,
+si le diamètre $D$ est généralement connu, ce n'est pas toujours le cas de
+$K$, la borne des fonctions de perte. Pour un bon choix des paramètres $\eta_t$,
+on peut toutefois avoir une borne satisfaisante pour le regret :
+
+\begin{thm}
+Avec les mêmes hypothèses que précédemment, posons :
+\[
+V_t = \sum_{k=1}^t \|l_t\|^2
+\]
+alors, pour le choix $\eta_t=D/\sqrt{V_{t-1}}$ on a :
+\[
+R_T\leq \sqrt{6} DK\sqrt{T} + \ldots
+\]
+\end{thm}
+
+\begin{proof}
+$\ldots$ est une constante dépendant de $K$. FINIR LE CALCUL
 \end{proof}
 
  
@@ -438,13 +813,19 @@ si :
 \[
 \forall t \in \set{N}^*,\quad \espc{X_t}{\trib{F}_{t-1}}=0,
 \]
-ce qui signifie également que le processus $(Y_T)$ défini par :
+ce qui signifie également que le processus $(S_T)$ défini par :
 \[
-\forall T \in \set{N}^*,\quad Y_T=\sum_{t=1}^{T}X_t
+\forall T \in \set{N}^*,\quad S_T=\sum_{t=1}^{T}X_t
 \]
 est une martingale par rapport à la filtration $(\trib{F}_T)$.
 \end{defi}
 
+On notera également :
+\[
+V_T=\sum_{t=1}^T
+\espc{X_t^2}{\trib{F}_{t-1}}
+\]
+
 \begin{lem}\label{lembernstein}
 Soient $X$ une variable aléatoire avec $X\leq 1$ et $\trib{F}$ une tribu telle
 que $\espc{X}{\trib{F}}=0$, alors :
@@ -460,7 +841,7 @@ On a $\eta X\leq \eta$, donc grâce au lemme \ref{inegexp} :
 \exp(\eta X)-\eta X-1\leq X^2(e^\eta-\eta-1)
 .\]
 
-En prenant de part et d'autre l'espérance conditionelle :
+En prenant de part et d'autre l'espérance conditionnelle :
 \[
 \espc{\exp(\eta X)}{\trib{F}}\leq 1 +(e^\eta-\eta-1)\espc{X^2}{\trib{F}}
 .\]
@@ -472,7 +853,7 @@ On conclut alors en utilisant $1+u\leq\exp(u)$ si $u\in\set{R}$.
 Soit $(X_t)$ une suite d'accroissements de martingale majorée par 1. Notons :
 \[
 \forall T \in \set{N}^*, \quad S_T=\sum_{t=1}^T X_t, \quad V_T=\sum_{t=1}^T
-\espc{X_t^2}{\trib{F}_{t-1}},
+\espc{X_t^2}{\trib{F}_{t-1}}
 ,\]
 et définissons, pour $\eta\in\set{R}^+$, le processus $(M_t)$ par :
 \[
@@ -508,21 +889,21 @@ ainsi l'inégalité de Bernstein :
 \begin{prop}
 Soit $(X_1,\ldots,X_T)$ une suite d'accroissements de
 martingale par rapport à la filtration $(\trib{F}_t)$. Supposons
-qu'ils existent $K>0$ tel que :
+qu'il existe $K>0$ tel que :
 \[
 \forall t \in \{1,\ldots,T\},\quad X_t\leq K,
 \]
-et $\sigma^2$ tel que $V_T\leq \sigma^2$, alors avec les mêmes notations que
-précédemment, on a:
+et $\sigma^2_T\geq 0$ tel que $V_T\leq \sigma^2_T$ presque sûrement, alors avec
+les mêmes notations que précédemment, on a:
 \[
 \forall\varepsilon>0,\quad
 \prob{\max_{1\leq t\leq T}S_t>\varepsilon}
 \leq
-\exp\left(-\frac{\varepsilon^2}{2\sigma^2+\frac{2}{3}K\varepsilon}\right)
+\exp\left(-\frac{\varepsilon^2}{2\sigma_T^2+\frac{2}{3}K\varepsilon}\right)
 \]
 \end{prop}
 
-\begin{proof} Par homogénéité, on se ramène aux cas où
+\begin{proof} Par homogénéité, on se ramène au cas où
 $K=1$. Soit $\varepsilon$ fixé, on a, pour tout
 $\eta\in\set{R}^+$ :
 \[
@@ -530,7 +911,7 @@ $\eta\in\set{R}^+$ :
 \prob{\max_{1\leq t\leq T}S_t>\varepsilon}
 &=\prob{\eta\max_{1\leq t\leq T}S_t>\eta\varepsilon} \\
 &\leq \prob{\max_{1\leq t\leq T}M_t
->\exp\left(\eta\varepsilon-\sigma^2(e^\eta-\eta-1)\right)}
+>\exp\left(\eta\varepsilon-\sigma_T^2(e^\eta-\eta-1)\right)}
 \end{split}
 \]
 
@@ -539,28 +920,30 @@ lemme \ref{lembernstein} :
 \[
 \begin{split}
 \prob{\max_{1\leq t\leq T}S_t>\varepsilon}
-&\leq \exp\big(\sigma^2(e^\eta-\eta-1)-\eta\varepsilon\big)\esp{M_1}\\
-&\leq \exp\big(\sigma^2(e^\eta-\eta-1)-\eta\varepsilon\big)
+&\leq \exp\big(\sigma_T^2(e^\eta-\eta-1)-\eta\varepsilon\big)\esp{M_1}\\
+&\leq \exp\big(\sigma_T^2(e^\eta-\eta-1)-\eta\varepsilon\big)
 \end{split}
 \]
 
 La borne de droite est minimale pour
-$\eta=\ln\left(1+\frac{\varepsilon}{\sigma^2}\right)$ et donne la majoration : 
+$\eta=\ln\left(1+\frac{\varepsilon}{\sigma_T^2}\right)$ et donne la majoration
+: 
 \[
 \prob{\max_{1\leq t\leq T}S_t>\varepsilon} 
-\leq \exp\left(-\sigma^2 h\left(\frac{\varepsilon}{\sigma^2}\right)\right)
+\leq \exp\left(-\sigma_T^2 h\left(\frac{\varepsilon}{\sigma_T^2}\right)\right)
 \]
 avec $h(x)=(1+x)\ln(1+x)-x$.
 
 On conclut alors en utilisant le lemme \ref{majorationh}.
 \end{proof}
 
-\begin{cor}
+\begin{cor}\label{corbernstein}
 Soit $\delta\in[0,1]$, alors avec les mêmes
 hypothèses que précédemment et probabilité supérieure à $1-\delta$ :
 \[
-S_T\leq\frac{2K}{3}\ln\left(\frac{1}{\delta}\right)+
-\sqrt{2\sigma^2\ln\left(\frac{1}{\delta}\right)}
+S_T\leq
+\sqrt{2\sigma_T^2\ln\left(\frac{1}{\delta}\right)}
++\frac{2K}{3}\ln\left(\frac{1}{\delta}\right)
 .\]
 \end{cor}
 
@@ -571,7 +954,7 @@ avec probabilité $1-\delta$ :
 \[
 S_T\leq \frac{K}{3}\ln\left(\frac{1}{\delta}\right)+
 \sqrt{\frac{K^2}{9}\ln\left(\frac{1}{\delta}\right)^2+
-2\sigma^2\ln\left(\frac{1}{\delta}\right)}
+2\sigma_T^2\ln\left(\frac{1}{\delta}\right)}
 .\]
 
 Puis on conclut en utilisant $\sqrt{a+b}\leq\sqrt{a}+\sqrt{b}$ si $a>0$ et
@@ -582,60 +965,71 @@ On peut également appliquer l'inégalité de Bernstein d'une autre façon, quit
 à renforcer les hypothèses, pour obtenir une inégalité faisant apparaître
 directement la variance conditionnelle du processus :
 
-\begin{cor}
+\begin{cor}\label{megabernstein}
 Soit $(X_1,\ldots,X_T)$ une suite d'accroissements de
-martingale par rapport à la filtration $(\trib{F}_t)$. Supposons que $V_T\geq
-1$ et qu'il existe $K>0$ tel que :
+martingale par rapport à la filtration $(\trib{F}_t)$. Supposons que
+presque sûrement on ait : $1\leq V_T\leq \sigma_T^2$ avec $\sigma_T^2\geq 3$ et
+qu'il existe $K>0$
+tel que :
 \[
-\forall t \in \{1,\ldots,T\},\quad \left|X_t\right|\leq K,
+\forall t \in \{1,\ldots,T\},\quad X_t\leq K,
 \]
-alors, avec probabilité supérieure à $1-\delta$ et pour $T\geq 3$ :
+alors, avec probabilité supérieure à $1-\delta$ :
 \[
 \forall C\in\left]1 ;+\infty\right[,\quad
-S_T\leq\frac{2K}{3}\ln\left(\frac{\ln T}{\delta}\frac{(1+\ln C)}{\ln C}\right)+
-\sqrt{2CV_T\ln\left(\frac{\ln T}{\delta}\frac{(1+\ln C)}{\ln C}\right)}
+S_T\leq
+\sqrt{2CV_T\ln\left(\frac{\ln \sigma_T^2}{\delta}\frac{(1+\ln C)}{\ln C}\right)}
++\frac{2K}{3}\ln\left(\frac{\ln \sigma_T^2}{\delta}\frac{(1+\ln C)}{\ln
+C}\right)
 .\]
 
 En particulier, en choisissant $C=3/2$, on obtient :
 \[
-S_T\leq\frac{2K}{3}\ln\left(\frac{\ln T}{\delta/4}\right)+
-\sqrt{3V_T\ln\left(\frac{\ln T}{\delta/4}\right)}
+S_T\leq
+\sqrt{3V_T\ln\left(\frac{\ln \sigma_T^2}{\delta/4}\right)}
++\frac{2K}{3}\ln\left(\frac{\ln \sigma_T^2}{\delta/4}\right)
 .\]
 \end{cor}
 
 \begin{proof}
-Posons $N=\lceil\ln T/\ln C\rceil$
+Posons $N=\lceil\ln \sigma_T^2/\ln C\rceil$
 D'après le corollaire précédent, pour tout $r\in\{1,\ldots,N\}$ :
 \[
-\prob{S_T\geq \frac{2K}{3}\ln\left(\frac{N}{\delta}\right)+
-\sqrt{2K^2C^r\ln\left(\frac{N}{\delta}\right)}
-\ \mathrm{et}\ V_T\leq K^2C^r}\leq\frac{\delta}{N}
+\prob{S_T\geq
+\sqrt{2C^r\ln\left(\frac{N}{\delta}\right)}
++\frac{2K}{3}\ln\left(\frac{N}{\delta}\right)
+\ \mathrm{et}\ V_T\leq C^r}\leq\frac{\delta}{N}
 \]
 
 D'où :
 \[
-\prob{S_T\geq \frac{2K}{3}\ln\left(\frac{N}{\delta}\right)+
+\prob{S_T\geq
 \sqrt{2CV_T\ln\left(\frac{N}{\delta}\right)}
-\ \mathrm{et}\ K^2C^{r-1}\leq V_T\leq K^2C^r}\leq\frac{\delta}{N}
++\frac{2K}{3}\ln\left(\frac{N}{\delta}\right)
+\ \mathrm{et}\ C^{r-1}\leq V_T\leq C^r}\leq\frac{\delta}{N}
 \]
 
-Or, vu que $1\leq V_T\leq K^2T\leq K^2C^N$, en sommant l'inégalité précédente
-pour
+Or, vu que $1\leq V_T\leq \sigma_T^2\leq C^N$, en sommant l'inégalité
+précédente pour
 $r\in\{1,\ldots,N\}$, on obtient :
 \[
-\prob{S_T\geq \frac{2K}{3}\ln\left(\frac{N}{\delta}\right)+
-\sqrt{2CV_T\ln\left(\frac{N}{\delta}\right)}}\leq\delta
+\prob{S_T\geq
+\sqrt{2CV_T\ln\left(\frac{N}{\delta}\right)}
++\frac{2K}{3}\ln\left(\frac{N}{\delta}\right)
+\leq\delta}
 .
 \]
 
-On conclut en remarquant que, si $T\geq 3$ :
+On conclut en remarquant que :
 \[
-N\leq\frac{\ln T}{\ln C}+1=\ln T\frac{(1+\ln C/\ln T)}{\ln C}
-\leq \ln T\frac{(1+\ln C)}{\ln C}\qedhere
+N\leq\frac{\ln \sigma_T^2}{\ln C}+1=\ln \sigma_T^2\frac{(1+\ln C/\ln
+\sigma_T^2)}{\ln C}
+\leq \ln \sigma_T^2\frac{(1+\ln C)}{\ln C}\qedhere
 \]
 \end{proof}
 
 \subsection{Projection sur un convexe fermé}
+\label{projection}
 
 On se place dans un espace euclidien $E$. Si $x$ et $y$ sont dans $E$, on note
 $x\cdot y$ le produit scalaire entre $x$ et $y$ et $\|x\|$ la norme euclidienne
@@ -655,33 +1049,34 @@ De plus $x$ vérifie la propriété de l'angle obtus :
 \]
 
 On dit que $x$ est la projection de $y$ sur le convexe $F$, et on note
-$x=p_F(y)$.
+$x=\Pi_F(y)$.
 \end{prop}
 
-On en déduit notamment que pour tout $z\in F$, $\|y-p_F(y)\|\leq\|y-z\|$.
+On en déduit notamment que pour tout $z\in F$, $\|y-\Pi_F(y)\|\leq\|y-z\|$.
 L'inégalité de l'angle obtus permet d'obtenir une autre inégalité.
 
-\begin{cor}\label{projection}
+\begin{cor}\label{inegprojection}
 Soient $F$ une partie convexe fermée non vide de $E$ et $y\in E$. Alors :
 \[
 \forall z\in F,\quad
-\|z-p_F(y)\|\leq\|z-y\|
+\|z-\Pi_F(y)\|\leq\|z-y\|
 \]
 \end{cor}
 
 \begin{proof}
 Soit $z\in F$, on part de l'inégalité de l'angle obtus :
 \[
-(y-p_F(y))\cdot(z-p_F(y))\leq 0
+\big(y-\Pi_F(y)\big)\cdot\big(z-\Pi_F(y)\big)\leq 0
 \]
 d'où :
 \[
-(y-z+z-p_F(y))\cdot(z-p_F(y))
-=\|z-p_F(y)\|^2-(z-y)\cdot(z-p_F(y))\leq 0
+\big(y-z+z-\Pi_F(y)\big)\cdot\big(z-\Pi_F(y)\big)
+=\|z-\Pi_F(y)\|^2-(z-y)\cdot\big(z-\Pi_F(y)\big)\leq 0
 \]
 puis en appliquant l'inégalité de Cauchy-Schwarz :
 \[
-\|z-p_F(y)\|^2\leq\|z-y\|\|z-p_F(y)\|\qedhere
+\|z-\Pi_F(y)\|^2\leq\|z-y\|\|z-\Pi_F(y)\|\qedhere
 \]
 \end{proof}
+
 \end{document}
author	Thibaut Horel <thibaut.horel@gmail.com>	2010-06-22 01:16:14 +0200
committer	Thibaut Horel <thibaut.horel@gmail.com>	2010-06-22 01:16:14 +0200
commit	71b1a44313019a7c58e038d6bb6d2b0aba44ec1f (patch)
tree	3ffc0edc6b6cbb398fe79aecb9d8c2fb91a92dc9
parent	5a3533fa3f2fb02fd3cda2b546549a61b8d0407b (diff)
download	bandits-71b1a44313019a7c58e038d6bb6d2b0aba44ec1f.tar.gz