\input amstex.tex
\documentstyle{amsppt}
\magnification = 1200
\def\u#1{\hskip0pt\vtop{\hbox{\strut #1}\hrule}}
\def\list#1#2{\noindent\hangindent 5em\hangafter1\hbox to 5em{\hfil#1
\quad}#2}
\def\card{\text {card}}
\vsize=9.5truein \voffset=-.2truein
\hsize=6.0truein \hoffset=.2truein
\baselineskip=2pt
\lineskip=1pt \lineskiplimit=0pt
\overfullrule=0pt
\nopagenumbers
\topmatter
\author{Stanislaw J. Szarek and Michel Talagrand}\endauthor
\title{On the Convexified Sauer-Shelah Theorem }\endtitle
\thanks{AMS classification numbers: 05B99. Work partially supported by NSF grants.}\endthanks
\abstract Let $A$ be a subset of $\{0,1\}^n$. Given $\epsilon > 0$,
we can find a subset $I$ of $\{1, \dots, n\}$ such that the convex hull
in $\Bbb R^I$ of the projection of $A$ onto $\{0, 1\}^I$ contains the
cube $[1/2 - \epsilon, 1/2 + \epsilon]^I$, and that
$\card I \ge n-K \big( n \epsilon + \sqrt {n \log {2^n \over \card A}}
\big)$, where $K>0$ is a universal constant.\endabstract
\endtopmatter
\vskip 0.5in
\noindent{\bf 1. Introduction.}
\medskip
For a subset $I$ of $\{1, \dots, n\}$, let us denote by $P_I$ the natural
projection from $\{0,1\}^n$ to $\{0,1\}^I$. The Sauer-Shelah lemma
[Sa], [Sh] asserts that given a subset $A$ of $\{0,1\}^n$ and an integer
$k$ with $\card A > \underset {i \le k} \to \sum { n \choose i}$,
there exists $I \subset \{1, \dots, n\}$ with $\card I > k$ and $P_I(A)
= \{0,1\}^I$. This result has proved to be of considerable use in
analysis and probability. One drawback of this result is however the fact
that, even when $\card A \ge 2^{n-1}$, we cannot guarantee that
$\card I > n / 2$. Moreover, even if, say, $\card A / 2^n > .999$, it is not,
for large $n$, possible to guarantee that $\card I / n \ge .501$.
Yet it is at times valuable to
find subsets $I$ of $\{1, \dots, n\}$ such that $n-\card I$ is small, yet
$P_I(A)$ is ``big." One result of this nature and an application was discovered in [S-T],
and we introduce the necessary notation to explain this result.
For a subset $B$ of $\{0,1\}^I$, we denote by $\text {conv} B$ the convex
hull of $B$, when $B$ is seen as a subset of $\Bbb R^I$. We say that $B
\subset \{0,1\}^I$ is {\it symmetric } if
$(x_i)_{i \in I} \in B \Rightarrow (1-x_i)_{i \in I} \in B$.
\proclaim {Theorem 1.1. [S-T]} Consider a symmetric subset $A$ of $\{0,1\}^n$ with
$\card A \ge 2^{n-1}$, and consider $\epsilon > 0$. Then there exists $I
\subset \{1, \dots, n\}$ with $\card I \ge n(1-4 \epsilon)$ and
$$
\text {conv} P_I (A) \supset [1/2 - \epsilon, 1/2 + \epsilon]^I.$$
\endproclaim
It is a natural question to investigate what happens when $\card A <<
2^{n-1}$ (or if $A$ is not symmetric). The method of [S-T], that relies on
the Sauer-Shelah lemma, apparently does not yield estimates of $\card I$
of the correct order. Obtaining such estimates is the purpose of the present paper.
An improvement of Theorem 1.1 in the symmetric case can be found in [A];
for another related sharp result see [G].
A first observation is that a straightforward push down argument reduces
the problem to the case when $A$ is hereditary i.e.
$$
x \in A, y \in \{0,1\}^n; \forall i \le n, y_i \le x_i\Rightarrow y
\in A. $$
\proclaim{Lemma 1.2} For each subset $A$ of $\{0, 1\}^n$, there is an
hereditary subset $B$ of $\{0, 1\}^n$, with $\card B = \card A$, and such
that, for each $\epsilon \ge 0$, each $I \subset \{ 1, \dots, n\}$ we have
$$
[1/2 - \epsilon, 1/2 + \epsilon]^I \subset \text { conv } P_I (B) \Rightarrow
[1/2 - \epsilon, 1/2 + \epsilon]^I \subset \text { conv } P_I (A).$$
\endproclaim
\demo {Proof} Let us fix $j \le n$. For $x \in \{0, 1\}^n$, we denote by
$\overline x$ the sequence obtained by replacing the $j^{\text {th}}$
coordinate of $x$ by zero. We define, for $x \in A$
$$\eqalign{
T_j(x) & = \overline x \qquad \text {if } \overline x \notin A \cr
T_j(x) & = x \qquad \text {if } \overline x \in A \cr}$$
We define $T_j (A) = \{ T_j (x); x \in A\}$. We leave to the reader
the easy proof that $T_j$ is one to one on $A$, so that $\card T_j (A) =
\card A$. We also leave the reader to check that after iterating
this operation for $j = 1, \dots, n$, the resulting set is hereditary. So
it is enough to prove that
$$
[1/2 - \epsilon, 1/2 + \epsilon]^I \not \subset \text { conv } P_I (A)
\Rightarrow [1/2 - \epsilon, 1/2 + \epsilon]^I \not \subset \text { conv }
P_I \big(T_j (A)\big).$$
It should be obvious that we can assume $j \in I$. If
$$[1/2 -
\epsilon, 1/2 + \epsilon]^I \not \subset \text { conv } P_I(A),$$
we can find $(\eta_i)_{i \in I} \in\{-1, 1\}^I$ with
$$
(1/2 + \eta_i \epsilon)_{i \in I} \notin \text { conv } P_I (A),$$
\noindent and the Hahn-Banach theorem asserts that there are numbers $(\alpha_i)_{i
\in I}$ such that
$$
\forall x \in A, \, \sum_{i \in I} \alpha_i x_i < \sum_{i \in I}
\alpha_i (\frac 1 2 + \eta_i \epsilon).$$
Consider $\overline\alpha_i$ given by $\overline \alpha_i = \alpha_i$
if $i \neq j$ and $\overline \alpha_j = |\alpha_j|.$ Define $\overline
\eta_i$ similarly. We show that
$$
\forall y \in T_j(A), \, \sum_{i \in I} \overline \alpha_i y_i <
\sum_{i \in I} \overline {\alpha_i} ( \frac 1 2 + \overline \eta_i \epsilon)$$
(and this finishes the proof).
We note that $\alpha_i \eta_i \le \overline \alpha_i \overline \eta_i$ for all $i$.
Thus it suffices to find, for any $y \in T_j(A)$, an $x \in A$ such
that
$x_i = y_i$ if $i \neq j$ and
$| \alpha_j| y_j - \alpha_j x_j \le {1 \over 2} (\vert \alpha_j\vert
- \alpha_j) = \alpha_j^-$. If $y_j = 0$, we take (any)
$x$ such that $T_j(x) = y$. If $y_j=1$, we observe, using the definition
of $T_j$, that both $y$ and $\overline y$ belong to $A$, and we choose $x = y$ or $x = \overline y$ depending on whether $\alpha_j \ge 0$ or $\alpha_j < 0$.
\hfill $\square$ \enddemo
Given a number $s$, we denote by $(s)_I$ the element of $\Bbb R^I$ that
has all its coordinates equal to $s$.
\proclaim {Lemma 1.3} If $A$ is hereditary, then
$$
[s - \epsilon, s + \epsilon]^I \subset \text { conv } P_I (A)
\Leftrightarrow (s+ \epsilon)_I \subset \text { conv } P_I (A).$$
\endproclaim
\demo {Proof} In fact, if $x \in \text { conv } P_I (A)$, and $0 \le y_i \le x_i$
for all $i \in I$, we have $y \in \text { conv } P_I(A)$. \hfill $\square$ \enddemo
The problem now becomes, given $r$, to find $I$ with $\card I$ as
large as possible and $(r)_I \in \text { conv } P_I(A)$.
A key ingredient of the proof will be the ability to measure the size of
a subset of $\{0,1\}^I$ not only by its cardinality or, equivalently, by
its normalised counting measure, but also by its size with respect to the
measures $\mu_{p,I} = \big( (1-p)\delta_0 + p \delta_1\big)^{\otimes I}$,
where $\delta_t$ is the unit mass concentrated at $t$.
(For $I = \{1, \dots, n\}$, we write $\mu_{p,I} = \mu_{p,n}$.) Thus it
will be natural to state a result in the line of Theorem 1.1
not only when one controls the cardinality of $A$, but when one controls
$\mu_{p,n}(A)$.
Before we state our result, it is instructive to motivate it by
considering the standard example where
$$
A =\{(x_i)_{i \le n} ; \sum_{i \le n} x_i \le q \}. $$
Here
$$
P_I(A)= \{ (x_i)_{i \in I}; \sum_{i \in I} x_i \le q\}.$$
If $\card I = n - k$ and if $(r)_I \in \text {conv} P_I (A)$, we have $r
\le q/(n-k)$, i.e. $k \ge (nr-q)/r$, so that
$$\leqalignno{
k \ge \frac {n(r-p)} r + \frac {np-q} r .&&{(1.1)}\cr}$$
We now observe, using standards estimates on the tail of the binomial
distribution, that (at least when $np/2 \le q \le np$) $\log 1/\mu_p
(A)$ is of order $(np-q)^2/np(1-p)$. Thus for $p \le r \le 2p$
(which is the interesting range) we must have
$$\leqalignno{
k \ge \frac 1 K \big( \frac {n(r-p)} p + \sqrt {\frac {n(1-p) \log 1/
\mu_p(A)} p} \big). &&{(1.2)}\cr}$$
(Of course the statement says nothing when the right hand side is greater
than or equal to $n$ which happens, in particular, if $r/p$ is large.)
The meaning of our main result is that the example presented above
is essentially the worst case.
\proclaim {Theorem 1.4} Assume $p \le 1/2$, and consider an heriditary
subset $A$ of $\{0,1\}^n$. Consider $p \le r \le 1$. Then we can find $I
\subset \{1, \dots, n \}$ with $(r)_I \in \text {conv} P_I(A)$ and
$$
n-\card I \le K\left ({n(r-p) \over p} + \sqrt {{n \over p} \log
1/\mu_p (A)}\right).$$
\endproclaim
In this statement, as well as in the rest of the paper, $K$ (with a subscript or not) denotes an effectively computable
universal (i.e. independent of $p$, $n$, $r$, $A$ or any other parameters) constant, the value of which may vary between occurences.
\proclaim{Corollary 1.5} Consider a subset $A$ of $\{0, 1\}^n$. Then
we can find $I \subset \{1, \dots, n\}$ with
$$
[1/2 - \epsilon, 1/2 + \epsilon ]^I \subset \text { conv } P_I (A)$$
and
$$
\card I \ge n-K \big( n \epsilon + \sqrt {n \log {2^n \over \card A}}
\big).$$
\endproclaim
\noindent {\bf Comments.} 1 - Theorem 1.1 provides a value of $n- \card
I$ of the correct order. Finding the exact best possible value is of
course a more challenging question.
\noindent 2 - The reason that we assume $p \le 1/2$ is simply that the
case $p=1/2$ is by far the most interesting, and that the case $p \le
1/2$ has an identical proof. Minor modifications are required to handle
the case $p > 1/2$.
Let us now comment on the proof of Theorem 1.2. To construct $I$ we will
remove points of $\{1, \dots, n \}$ one at a time. When choosing which
point $j$ to remove first, the natural idea is to insure that $P_J(A)$
is ``large," where $J$ is the complement of $j$. The
first thought would be to require that $\mu_{p, J} \big ( P_J (A) \big)$
is substantially larger than $\mu_{p,n} (A)$. However, examples in
[K-K-L] show that this is impossible. Rather, we will require a control
of $\mu_{p',J} \big(P_J(A)\big)$ where $p'$ is somewhat
larger than $p$. (The reader should observe that, when $A$ is hereditary,
increasing $p$ {\underbar {decreases}} $\mu_p(A)$.) This is the object
of the main Lemma proved in Section 2. The proof of Theorem 1.2
is then completed in Section 3.
\bigskip
\noindent {\bf 2. The Main Lemma}
\proclaim{Lemma 2.1} Consider an hereditary set $A$ $\subset \{0,1\}^n$,
and for $i \le n$, set
$$
b_i=\int_A (p-x_i) d \mu_{p,n} (x) \, \big( = \sum_{x \in A} \prod^n_{j=1}
p^{x_j}(1-p)^{1-x_j} (p-x_i)\big).$$
Assume $p \le \frac 2 3$. Consider $j\le n$ such that $b_j= \max\limits_{i
\le n} b_i$. Set $J = \{1, \dots, n\} - \{j\}$. Then, for $p' = p
(1+\frac 1 {Kn})$ we have
$$
\log \mu_{p',J} \big( P_J (A) \big) \ge \log \mu_{p,n} (A) + \frac {b_j}
{2\mu_{p,n}(A)}.$$
\endproclaim
\noindent{\bf Comment.} Thus we achieve that $P_J(A)$ is larger than $A$
in the sense that $\mu_{p',J} \big(P_J(A)\big)$ is somewhat larger than
$\mu_{p,n} (A)$ for $p'=p(1+\frac 1 {Kn} )$.
Before we start the proof, we need an auxilliary result.
\proclaim {Lemma 2.2} Assume $p \le p' \le p(1+\frac 1 {3n}), p \le
\frac 2 3$. Then $\frac
1 K \mu_{p,n} \le \mu_{p', n} \le K \mu_{p,n}$. \endproclaim
\demo{Proof} Consider $x\in \{0,1\}^n$. Set $\ell = \underset {i \le n}
\to\sum x_i$, so that
$$
\mu_{p,n} (\{x\})= p^\ell (1-p)^{n- \ell}; \mu_{p',n} (\{x\})={p'}^\ell
(1-p')^{n-\ell}$$
and
$$
\frac {\mu_{p,n} (\{x\})} {\mu_{p',n} (\{x\})}= (\frac p {p'})^\ell
(\frac {1-p} {1-p'})^{n-\ell}.$$
Now
$$\eqalign{
\frac 1 {(1+\frac 1 {3n})} &\le \frac p {p'}\le 1 \cr
1&\le \frac {1-p} {1-p'} \le \frac {1 - \frac 2 3} {1- \frac 2 3 (1 + \frac 1 {3n})}
\le 1+\frac 2 n\cr}.$$
Thus
$$
\frac 1 {(1+ \frac 1 {3n})^n} \le \frac {\mu_{p,n} (\{x\})} {\mu_{p',n}
(\{x\})}\le (1+ \frac 2 n)^n .$$
\hfill $\square$
\demo{Proof of Lemma 2.1} {\bf Step 1.} To simplify the notation,
we assume without loss of generality that $j=n$. We set $\mu = \mu_{p,n},
\tilde \mu = \mu_{p,J}$.\enddemo
We set
$$\eqalign {
A^0 &= \{x=(x_i)_{i\le n-1}\in\{0,1\}^{n-1}; x^\smallfrown 0\in A\}\cr
A^1 &= \{x=(x_i)_{i\le n-1}\in\{0,1\}^{n-1}; x ^\smallfrown 1\in A\}\cr},$$
where $x^\smallfrown 0$ denotes concatenation. We note that since $A$ is
hereditary, $A^1 \subset A^0$ and $A^0=P_J (A)$.
We set $a^i= \tilde \mu (A^i)\, i=0,1$; note that $a^0 \ge a^1$ and
$\mu(A) = (1-p) a^0 + pa^1 \le a^0$. We have
$$
\leqalignno{ b_n = (1-p) a^0 p + pa^1(p-1)
= p(1-p)(a^0-a^1). &&{(2.1)}\cr}$$
Also
$$
\tilde \mu(A^0)-\mu(A)=a^0-\mu(A)=p(a^0-a^1)$$
and thus
$$
\tilde \mu(A^0)=\mu(A)+\frac {b_n}{1-p} = \mu(A) \big( 1+\frac {b_n}
{(1-p)\mu (A)}\big).$$
Now,
$$
\frac {b_n} {\mu(A)} = \frac {p(1-p)(a^0 - a^1)} {(1-p)a^0 +pa^1} \le p.$$
Since $\log (1+x/(1-p)) \ge x$ for $0 \le x \le p < 1$, we have proved that
$$\leqalignno{
\log \tilde \mu (A^0) \ge \log \mu (A) + \frac {b_n} {\mu (A)}.
&&{(2.2)} \cr}$$
\noindent {\bf Step 2.} The difference between (2.2) and what we want is
that the left-hand side involves $\tilde \mu = \mu_{p,J}$ rather than $\mu_{p', J}$.
Increasing $p$ to $p'$ will decrease $\log \mu_{p,J} (A^0)$, and we have to
show that the resulting ``loss" will not wipe out the ``gain" witnessed by (2.2). For $s
\le 1$, let us set $\nu_s = \mu_{s,J}$. The key point is Margulis' formula
[M] that gives an expression for $\frac {d \nu_s} {ds} (A^0)$. Consider,
for each $i