%=======================02-713 LaTeX template, following the 15-210 template==================
%
% You don't need to use LaTeX or this template, but you must turn your homework in as
% a typeset PDF somehow.
%
% How to use:
% 1. Update your information in section "A" below
% 2. Write your answers in section "B" below. Precede answers for all
% parts of a question with the command "\question{n}{desc}" where n is
% the question number and "desc" is a short, one-line description of
% the problem. There is no need to restate the problem.
% 3. If a question has multiple parts, precede the answer to part x with the
% command "\part{x}".
% 4. If a problem asks you to design an algorithm, use the commands
% \algorithm, \correctness, \runtime to precede your discussion of the
% description of the algorithm, its correctness, and its running time, respectively.
% 5. You can include graphics by using the command \includegraphics{FILENAME}
%
\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{graphicx}
\usepackage[margin=1in]{geometry}
\usepackage{fancyhdr}
\setlength{\parindent}{0pt}
\setlength{\parskip}{5pt plus 1pt}
\setlength{\headheight}{13.6pt}
\newcommand\question[2]{\vspace{.25in}\hrule\textbf{#1: #2}\vspace{.5em}\hrule\vspace{.10in}}
\renewcommand\part[1]{\vspace{.10in}\textbf{(#1)}}
\newcommand\algorithm{\vspace{.10in}\textbf{Algorithm: }}
\newcommand\correctness{\vspace{.10in}\textbf{Correctness: }}
\newcommand\runtime{\vspace{.10in}\textbf{Running time: }}
\pagestyle{fancyplain}
\lhead{\textbf{\NAME\ }}
\chead{\textbf{Homework 2}} % for course ``Information theory in theoretical computer science"}}
\rhead{COMS6998, \today}
\newcommand\eps{\epsilon}
\newcommand\x{\bf x \rm}
\newcommand\y{\bf y \rm}
\newcommand\supp{Supp}
\newcommand\E{\mathbb{E}}
\newcommand\cE{\mathcal{E}}
\newcommand\solution{\bf Solution \rm}
% KL DIVERGENCE :
\providecommand{\Div}[2]{\mathsf{D}\left(\begin{array}{c} #1 \\ \hline \hline #2\end{array} \right)}
\renewcommand{\div}[2]{\mathsf{D}\left( #1 || #2\right)}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}\raggedright
%Section A==============Change the values below to match your information==================
\newcommand\NAME{Omri Weinstein} % your name
\newcommand\HWNUM{1} % the homework number
%Section B==============Put your answers to the questions below here=======================
% no need to restate the problem --- the graders know which problem is which,
% but replacing "The First Problem" with a short phrase will help you remember
% which problem this is when you read over your homeworks to study.
\question{1}{Problem 1 (Shearer for Mutual Information), 25 pts}
Let $X = X_1,\ldots, X_n$ be (correlated) random variables, and let $S\subset_R [n]$ s.t $\Pr[i \in S] \geq \alpha$ $\forall i\in [n]$.
Recall that Shearer's lemma asserts that $\E_S[H(X_S)] \geq \alpha H(X)$. This exercise examines the possibility of
extending Shearer's lemma to mutual information.
\part{a} With the above assumption, is it also true that for any random variable $T$, $\E_S[I(X_S ; T)] \geq \alpha I(X;T)$?
Prove or find a counter example. % (how large can the gap be?).
\part{b} Show that if further: $(i)$ all $X_i$'s are independent of each other conditioned on $T$ (i.e., $I(X_i;X_{*0$, there is a
subset $G\subset [n]$ of $|G| \geq n - O(\log(1/\eps)/\delta^2)$ coordinates s.t
$\|(V|\cE)_G - V_{G}\|_1 \leq \delta$ (where $V_G$, $(V|\cE)_G)$ are the distributions projected on the
coordinates in $G$ only).
(Hint: Use the fact that conditioning on ``large" events does not distort the distribution by much, and then apply
Pinsker's inequality).
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question{3}{Problem 3 (Entropy vs. Expectation), 25 pts}
Let $X$ be an $\mathbb{N}$-valued random variable.
%We know that the entropy of $X$ is at most the logarithm
%of $Supp(X)$, and the latter should be comparable to $\E[X]$. In this exercise we will use KL divergence to formalize this fact.
Show that $H(X) \leq O(\E[\lg X])$.
(Hint: Define the random variable $Y$ s.t $\Pr[Y=i] = 1/ci^2$ for any $i\in \mathbb{N}$
using the fact that the series $\sum_{i=1}^\infty 1/i^2$ converges (to $c=\pi^2/6$). Now use nonnegativity of KL
divergence).
\
(5 pt Bonus) %Conclude that $\E[\log X]< \infty \; \Rightarrow H(X) < \infty$. Moreover,
Can you think of a counterexample when %the condition $\supp(X) \subseteq \mathbb{N}$ is necessary by giving a counterexample to (a) when
$\supp(X) \nsubseteq \mathbb{N}$?
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question{4}{Problem 4 (Statistical distance vs. Mutual Information), 20 pts}
Let $(X,M)$ be jointly distributed r.vs. Prove that
\[ \E_x\left[\|(M|x) - M\|_1\right] \leq \sqrt{(2\ln 2) I(X;M)} , \]
where $(M|x)$ denotes the distribution of $M$ conditioned on $X=x$.
Conclude that one can approximately sample the message $M=M(X)$ even without knowing $X$, so long that $X$ and $M$ are not
very correlated. (Hint: Pinsker's Inequality).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\question{5}{Problem 5 (Fooling Set vs. Rank lower bound), 30 pts}
Let $f(x,y)$ be a two-party Boolean function. Recall that a \emph{Fooling-Set} of $f$ of size $k$
is a set of input pairs $\{(x_i,y_i)\}_{i=1}^k$ such that $f(x_i,y_i) = 1$ but for any $i\neq j \in [k]$,
either $f(x_i,y_j) = 0$ or $f(x_j,y_i) = 0$. Let $FS(M_f)$ denote the size of the largest fooling set of $f$.
\
(a) Let $GT_n(x,y) = 1 \Leftrightarrow \; x\geq y$. Use the Fooling-Set technique to show that $D(GT_n) = \Omega(n)$.
\
(b) Call a Fooling-Set $S$ of $f$ ``strong" if it has the stronger property that
$f(x_i,y_i) \equiv 1$ but for any $i\neq j \in [|S|]$,
\emph{both} $f(x_i,y_j) = 0$ \emph{and} $f(x_j,y_i) = 0$. Denote by $\overline{FS(M_f)}$ the size of the
largest strong fooling set of $f$. Prove that
\[ rk(M_f) \geq \overline{FS(M_f)},\]
where $rk(M_f)$ is the rank of the communication matrix $M_f$ (over the reals), recalling that $M_f(x,y) := f(x,y)$).
(Hint: What can you say about the set of rows $r_{x_1},\ldots,r_{x_{|S|}}$ in $M_f$?)
We note that this argument can be adapted to show that $rk(M_f) \geq FS(M_f)/2$ for any $f$.
\end{document}
*