
\documentclass[12pt]{book}
% Monday March 23, 2001 9:00 AM
\usepackage{amsthm,amsfonts,amsmath,amssymb,latexsym,epsfig}
%
\title{Linear Algebra for Math 542}
\author{JWR}
\date{Spring 2001}
%
\newif\ifanswer
\answerfalse
%
\newcommand{\N}{\mathbb{N}}
\newcommand{\Q}{\mathbb{Q}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\F}{\mathbb{F}}
%
\newcommand{\Proof}[1]{\par\medskip\par\noindent{\it Proof#1.}}
\newcommand{\QED}{\hfill QED\par\medskip\par}
\newcommand{\jdef}[1]{{\bf #1}\index{#1}}
\newcommand{\tr}{^*}
\newcommand{\ctr}{^\dagger}
\newcommand{\dig}{\phantom{0}} % For alignment
\newcommand{\sig}{\phantom{+}} %      "
%
%
\newcommand{\xy}{t}
\newcommand{\Poly}{\mathrm{Poly}}
\newcommand{\Trig}{\mathrm{Trig}}
\newcommand{\Cos}{\mathrm{Cos}}
\newcommand{\Sin}{\mathrm{Sin}}
\newcommand{\row}{\mathrm{row}}
\newcommand{\col}{\mathrm{col}}
\newcommand{\entry}{\mathrm{entry}}
\newcommand{\diag}{\mathrm{diag}}
\newcommand{\IC}[2]{I_{#1,#2}}
\newcommand{\FLAG}[2]{E_{#1,#2}}
%
%Matrix maps
\newcommand{\Aa}{\mathbf{A}}
\newcommand{\Bb}{\mathbf{B}}
\newcommand{\Hh}{\mathbf{H}}
\newcommand{\Nn}{\mathbf{N}}
\newcommand{\Pp}{\mathbf{P}}
\newcommand{\Qq}{\mathbf{Q}}
%
% vector spaces
\newcommand{\II}{\mathbf{I}}
% \newcommand{\LL}{\mathbf{L}}
\newcommand{\NN}{\mathbf{N}}
\newcommand{\TT}{\mathbf{T}}
\renewcommand{\SS}{\mathbf{S}}
\newcommand{\UU}{\mathbf{U}}
\newcommand{\VV}{\mathbf{V}}
\newcommand{\WW}{\mathbf{W}}
\newcommand{\uu}{\mathbf{u}}
\newcommand{\vv}{\mathbf{v}}
\newcommand{\ww}{\mathbf{w}}
\newcommand{\0}{\mathbf{0}}
\newcommand{\bPhi}{\mathbf{\Phi}}
\newcommand{\bPsi}{\mathbf{\Psi}}
\newcommand{\bUpsilon}{\mathbf{\Upsilon}}
\newcommand{\bPi}{\mathbf{\Pi}}
\newcommand{\bphi}{\mathbf{\phi}}
\newcommand{\bpsi}{\mathbf{\psi}}
\newcommand{\biota}{\mathbf{\iota}}
\newcommand{\bpi}{\mathbf{\pi}}
%
\newcommand{\Mat}{\left[\begin{array}}
\newcommand{\Rix}{\end{array}\right]}
\newcommand{\NULLSP}{\mathcal{N}}
\newcommand{\Range}{\mathcal{R}}
\newcommand{\LMAP}{\mathcal{L}}
\newcommand{\Span}{\mathrm{Span}}
\newcommand{\EIG}{\mathcal{E}}
\newcommand{\GEIG}{\mathcal{G}}
%
%%%%%%%%%%%%%%%%%% ANSWERS %%%%%%%%%%%%%%%%%
\newcommand{\Amark}{}
%
%
%  {lawtable} is used to list laws followed by their names as in
%
%  \item  $(AB)C=A(BC)$ & (Associative Law) \\
%
%
\newenvironment{lawtable}{
\begin{center}\begin{tabular}{ll} & \\*[0.01\textwidth]
}{ & \\*[0.01\textwidth]\end{tabular}\end{center}}
%
%
\newcommand{\BoldHeadline}[1]{\begin{center}
{\bf #1}
\end{center} \par
}
%
\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{cor}[theorem]{Corollary}
\newtheorem{prop}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{question}[theorem]{Question}
%
\makeindex
\begin{document}



   \maketitle
\tableofcontents


\chapter{Preliminaries}

\section{Sets and Maps}
We assume that the reader is familiar with the language of sets
and maps.
The most important concepts are the following:


 \begin{definition}\rm
Let $V$ and $ W$ be sets and
 $ T:V\to W$ be a map between them.
The map $T$ is called \jdef{one-one}
 iff
$x_1=x_2$ whenever  $ T(x_1)=T(x_2)$.
 The map $ T$ is called \jdef{onto}
 iff
 for every  $y\in W$ there is
 an  $x\in V$ such that  $ T(x)=y$.
A map is called \jdef{one-one onto}
iff it is both one-one   and  onto.
\end{definition}

\begin{remark}\rm
Think of the equation  $y=T(x)$ as a problem to be solved for $x$.
Then:
\begin{quote}
the map $ T: V\to W$ is
$
\left\{
     \begin{array}{c}
           \mbox{ one-one  }\\
           \mbox{ onto }\\
           \mbox{ one-one  onto }
     \end{array}
\right\}
$
\end{quote}

\noindent if and only if for every  $ y\in W$ the equation

\begin{quote}
$y=T(x)$ has
$
\left\{
 \begin{array}{c}
       \mbox{ at most }\\
       \mbox{ at least }\\
       \mbox{ exactly }
 \end{array}
\right\}
$
one  solution  $ x\in V$.
\end{quote}
\end{remark}

\begin{example}\rm
The map
 $$
   \R\to \R:x\mapsto x^3
 $$
is both one-one   and  onto   since the equation
 $$
   y=x^3
 $$
possesses the unique solution $y^{\frac{1}{3}}\in\R$ for every $y\in\R$.
In contrast, the map
 $$
   \R\to \R:x\to x^2
 $$
is not one-one   since the equation
 $$
   4=x^2
 $$
has {\em two} distinct solutions, namely $x=2$ and $x=-2$.
It is also not  onto   since $-4\in\R$, but the equation
 $$
   -4=x^2
 $$
has {\em no} solution $x\in\R$.
The equation $-4=x^2$
does have a complex solution $x=2i\in\C$,  but that solution is not
relevant to the question of whether the map
 $
   \R\to \R:x\mapsto x^2
 $
is  onto.
The maps
 $
   \C\to \C:x\mapsto x^2
 $
and
 $
   \R\to \R:x\mapsto x^2
 $
are different: they have a different source and target.
The map
 $
   \C\to \C:x\mapsto x^2
 $
{\em is}  onto.
\end{example}


\begin{definition}\rm
The \jdef{composition} $ T\circ  S$ of two maps
 $$
    S: U\to V,\;\;
    T:V\to  W
 $$
is the map
 $$
    T\circ  S: U\to  W
 $$
defined by
 $$
   ( T\circ S)( u) =  T( S( u))
 $$
for $ u\in U$.
    For any set $V$ the \jdef{identity map}
 $$
     I_V:V\to V
 $$
is defined by
 $$
    I_V( v)= u
 $$
for $ v\in V$.
It satisfies the identities
 $$
       I_V\circ S= S
 $$
for $ S: U\to V$ and
 $$
        T\circ I_V= T
 $$
for $ T:V\to  W$.
\end{definition}

\begin{definition}[Left Inverse]\rm
Let  $T:V\to W$.
A \jdef{left inverse} to  $ T$ is a
map  $ S: W\to V$ such that
 $$
    S\circ T =  I_VV.
 $$
\end{definition}

 \begin{theorem}[Left Inverse Principle]
     A map  is one-one
if and only if
it has a left inverse.
 \end{theorem}

\Proof{} If  $ S: W \to V$ is
a left inverse to  $ T: V \to W$, then the problem  $ y=T(x)$ has at most
one solution: if  $y=T(x_1)=T(x_2)$ then
 $S(y)=S(T(x_1))=S(T(x_2))$, hence  $x_1=x_2$ since
 $S(T(x))=I_V(x)=x$. Conversely, if the problem  $ y=T(x)$ has
at most one solution, then any map  $ S: W \to V$ which
assigns to  $ y\in W$ a solution  $ x$ of  $ y=T(x)$ (when there
is one) is a left inverse to  $ T$. (It does not matter what value
 $S$ assigns to  $y$ when there is no solution  $x$.)
\QED

\begin{remark}\rm
If  $ T$ is one-one   but not  onto   the left inverse is not
unique, provided that its source has at least two distinct
elements. This is because when $ T$ is not  onto, there is
a $y$ in the target of $ T$ which is not in the
range of $ T$. We can always make
a given left inverse $S$ into a different one by changing $ S(y)$.
\end{remark}

\begin{definition}[Right Inverse]
Let  $ T: V \to W$.
A \jdef{right inverse} to  $ T$  is a
map  $ R: W \to V$ such that
 $$
   T\circ R = I_W.
 $$
\end{definition}

 \begin{theorem}[Right Inverse Principle]
    A map  is  onto
if and only if
it has a right inverse.
 \end{theorem}


 \Proof{} If  $R: W \to V$ is a right inverse to
 $ T: V \to W$, then  $ x=R(y)$ is a solution to
 $ y=T(x)$ since  $ T(R(y))=I_W(y)=y$.  In other words,
if $ T$ has a right inverse, it is  onto.
The examples
below should convince the reader of the truth of the converse.



\begin{remark}\rm
The  assertion
that there is a right inverse  $ R: W \to V$
to any  onto   map  $ T: V \to W$ may not seem
obvious to someone who thinks of a map as a computer program:
even though the problem  $ y=T(x)$ has a solution  $ x$, it
may have many, and how is a computer program to choose?
If  $ V\subseteq  \N$, one could define  $R(y)$
to be the smallest  $ x\in V$ which solves  $ y=T(x)$. But
this will not work if  $ V=\Z$;  in this case there may
not be a smallest $x$.  In fact, this converse assertion
is generally taken as an axiom, the so-called
\jdef{axiom of choice}, and can neither
be proved (Cohen showed this in 1963)
nor disproved (G\"{o}del showed this in 1939)
from the other axioms of mathematics.
It can, however, be proved in certain cases;
for example, when  $V\subseteq \N$ (we just did this).
We shall also see that
it can  be proved in the case of matrix maps,
which are the most important maps studied in these notes.
\end{remark}



\begin{remark}\rm
If  $ T$ is  onto   but not one-one, the right inverse is not
unique. Indeed, if $ T$ is not one-one, then
there will be $x_1\ne x_2$ with
$ T(x_1)=T(x_2)$. Let $y=T(x_1)$. Given a right inverse $R$
we may change its value at $y$ to produce two distinct
right inverses, one which sends $y$ to $x_1$ and another
which sends $y$ to $x_2$.
\end{remark}

\begin{definition}[Inverse]\rm
Let  $ T: V \to W$.
A \jdef{two-sided inverse} to  $ T$ is a map
$ T^{-1}: W\to V$ which is both a left
inverse to  $ T$ and a right inverse to  $ T$:
 $$
   T^{-1}\circ T=I_V,\;\;\;\;T\circ T^{-1}=I_W.
 $$
The word  \jdef{inverse} unmodified means two-sided inverse.
A map is called \jdef{invertible} iff it has a (two-sided) inverse.
\end{definition}

As the notation suggests,
inverse $T^{-1}$  to $T$ is unique (when it exists).
The following easy proposition explains why this is so.

 \begin{theorem}[Unique Inverse Principle]
   If a map $T$ has both a left inverse
and a right inverse, then it has a two-sided inverse.
This two-sided inverse is the only
one-sided inverse to $T$.
 \end{theorem}

\Proof{} Let $S: W \to V$ be a left inverse
to $T$ and $R: W\to V$ be a right inverse.
Then
$
    S\circ T= I_V
$
 and
$
  T\circ R= I_W.
$
Compose on the right by $R$ in the first equation to obtain
$
    S\circ T\circ R= I_V\circ R
$
and use the second to obtain
$
    S\circ I_W= I_V\circ R.
$
Now composing a map with the identity (on either side)
does not change the map so we have
$
   S=R.
$
This says that $S$ ($=R$) is a two-sided identity. Now if $S_1$ is another
left inverse to $T$, then this same argument shows that
$
     S_1=R
$
(that is, $S_1=S$). Similarly $R$ is the only right inverse to $T$.
\QED




\begin{definition}[Iteration]\rm
A map $ T: V\to V$ from a set to itself
can be iterated: for each non-negative integer $p$
define $T^p: V\to V$ by
$$
T^p=\underbrace{T\circ T\circ\cdots\circ T}_p.
$$
The iterate $T^p$ is meaningful for negative
integers $p$ as well when $ T$ is an isomorphism. Note the formulas
$$
  T^{p+q}=T^p\circ T^q, \qquad T^0=I_V,\qquad (T^p)^q=T^{pq}.
$$
\end{definition}


\section{Matrix Theory}
 Throughout $\F$ denotes a field such as
the rational numbers $\Q$,
 the real numbers $\R$,
or the complex numbers $\C$.
We assume the reader is familiar
with the following operations from matrix theory:


\bigskip

 \begin{tabular}{ll}
 $ \F^{p\times q}\times \F^{p\times q}\to\F^{p\times q}:(X,Y)\mapsto X+Y $
    & (Addition) \\
 $ \F\times \F^{p\times q}\to\F^{p\times q}: (a,X)\mapsto aX   $
    & (Scalar Multiplication) \\
 $ 0=0_{p\times q}\in\F^{p\times q}$
    & (Zero Matrix) \\
 $ \F^{m\times n}\times \F^{n\times p}\to\F^{m\times p}: (A,B)\mapsto AB$
    & (Matrix Multiplication) \\
 $ \F^{m\times n}\to\F^{n\times m}: A\mapsto A\tr $
    & (Transpose) \\
 $ \F^{m\times n}\to\F^{n\times m}: A\mapsto A\ctr $
    & (Conjugate Transpose) \\
 $ I=I_n\in\F^{n\times n}$
    & (Identity Matrix) \\
 $ \F^{n\times n}\to \F^{n\times n}: A\mapsto A^p$
    & (Power) \\
 $ \F^{n\times n}\to \F^{n\times n}: A\mapsto f(A)$
    & (Polynomial Evaluation)
 \end{tabular}

\bigskip

We shall assume that the reader knows the following fact
which is proved by Gaussian Elimination:

 \begin{lemma}
   Suppose that $A\in\F^{m\times n}$ and $n>m$. Then
there is an $X\in\F^{m\times n}$ with $AX=0$ but $X\ne 0$.
 \end{lemma}

The equation $AX=0$ represents a homogeneous system of
$m$ linear equations in $n$ unknowns so the theorem says
that {\it a homogeneous linear system with more unknowns
than equations possesses a non-trivial solution.}
Using this lemma we shall prove the all-important

 \begin{theorem}[Dimension Theorem]\label{PRQ-dim-thm}
  Let $A\in\F^{m\times n}$ and
  $\Aa:\F^{n\times 1}\to \F^{m\times 1}$ be
 the corresponding matrix map:
   $$
     \Aa(X)=AX
   $$
  for $X\in\F^{n\times 1}$.   Then
   \begin{description}
     \item[(1)] If $\Aa$ is one-one, then $n\le m$.
     \item[(2)] If $\Aa$ is onto, then $m\le n$.
     \item[(3)] If $\Aa$ is invertible, then $m=n$.
   \end{description}
 \end{theorem}

\Proof{ of~(1)} Assume $n>m$.
The lemma gives $X\ne 0$ with $AX=A0$
so $\Aa$ is not one-one.

\Proof{ of~(2)} Assume $m>n$.
The lemma (applied to $A\tr$) gives $H\ne 0$ with $HA=0$.
Choose $Y\in\F^{m\times 1}$ with $HY\ne 0$.
Then for  $X\in\F^{n\times 1}$ we have $H\Aa(X)=HAX=0$.
Hence $\Aa(X)\ne Y$ for all $X\in\F^{n\times 1}$
so $\Aa$ is not onto.

\Proof{ of~(3)}. This follows from~(1) and~(2). \QED



\chapter{Vector Spaces}

A vector space is simply a space endowed with two operations,
{\em addition} and {\em scalar multiplication},
which satisfy the same algebraic laws as matrix addition
and scalar multiplication.
The archetypal example of a vector space is the space
$\F^{p\times q}$ of all matrices of size $p\times q$,
but there are many other examples. Another example
is the space $\Poly_n(\F)$ of all polynomials
(with coefficients from $\F$) of degree $\le n$.

  The vector space $\Poly_2(\F)$ of all polynomials $f=f(\xy)$ of
form $f(\xy)=a_0+a_1\xy+a_2\xy^2$
and the vector space $\F^{1\times 3}$ of all row matrices
$A=\Mat{lll}a_0&a_1&a_2\Rix$
are {\em not} the same:
the elements of the former space are polynomials
and the elements of the latter space  are matrices,
and a polynomial and a matrix are different things.
But there is a
correspondence between the two spaces: to specify an element
of either space is to specify three numbers: $a_0,a_1,a_2$.
This correspondence preserves the vector space operations
in the sense that
if the polynomial $f$ corresponds to the matrix $A$
and  the polynomial $g$ corresponds to the matrix $B$
then  the polynomial $f+g$ corresponds to the matrix $A+B$
and  the polynomial $bf$ corresponds to the matrix $bA$.
(This is just another way of saying that to add matrices
we add their entries and to add polynomials we add their
coefficients and similarly for multiplication by a scalar $b$.)
What this means is that calculations involving polynomials
can often be reduced to calculations involving matrices.
This is why we make the definition of {\em vector space}:
to help us understand what apparently different mathematical
objects have in common.

\section{Vector Spaces}

\begin{definition}\rm\label{VSP-def}
     A \jdef{vector space} over
\footnote{
A vector space over $\R$ is also called a \jdef{real vector space} and
a vector space over $\C$ is also called a \jdef{complex vector space.}
}
$\F$ is a set $\VV$ endowed with two
operations:

\medskip

\begin{tabular}{ll}
    addition &
        $\VV\times\VV\to\VV: (\uu,\vv)\mapsto \uu+\vv$\\
    scalar multiplication &
         $ \F\times \VV\to \VV: (a,\vv)\mapsto a\vv $
\end{tabular}

\medskip

\noindent and having a distinguished element $\0\in \VV$ (called the
\jdef{zero vector} of the vector space) and satisfying the following
axioms:

\medskip

\begin{lawtable}
  $ (\uu+\vv)+\ww=\uu+(\vv+\ww) $    & (additive associative law)       \\
  $ \uu+\vv=\vv+\uu $                & (additive commutative law)      \\
  $\uu+\0=\uu $                      & (additive identity)             \\
  $ a(\uu+\vv) = a\uu +a\vv $        & (left distributive law)         \\
  $ (a+b)\uu = a\uu +b\uu $          & (right distributive law)        \\
  $ a(b\uu)=(ab)\uu $                & (multiplicative associative law)\\
  $ 1\vv=\vv $                       & (multiplicative identity)       \\
  $ 0\vv=\0 $                        & (zero law)    \\
\end{lawtable}

\medskip

\noindent for $\uu,\vv,\ww\in \VV$ and $a,b\in \F$.
The elements of a vector space are sometimes called \jdef{vectors}.
For vectors $\uu$ and $\vv$  we introduce the abbreviations

\begin{lawtable}
  $-\uu = (-1)\uu $                    & (additive inverse) \\
  $\uu-\vv = \uu+(-\vv)$               & (subtraction) \\
\end{lawtable}
\end{definition}


A great many other algebraic laws follow from the axioms and
definitions but we shall not prove any of them.  This is because
for the vector spaces we study these laws are as obvious as
the axioms.

\begin{example}\rm  The archetypal example is:
$$
        \VV= \F^{p\times q}
$$
the space of all $p\times q$ matrices with elements from $\F$
with the operations
$$
\F^{p\times q}\times\F^{p\times q}\to\F^{p\times q}
  :(X,Y)\mapsto X+Y
$$
of matrix addition
and
$$
\F\times\F^{p\times q}\to\F^{p\times q}
  :(a,X)\mapsto aX
$$
of scalar multiplication
and zero element
$$
       \0 = 0_{p\times q}
$$
the $p\times q$ zero matrix.
\end{example}


\section{Linear Maps}
 \begin{definition}\rm
   Let $\VV$ and $\WW$ be vector spaces. A
\jdef{linear map} from $\VV$ to $\WW$ is a map
 $$
   \TT:\VV\to \WW
 $$
(defined on $\VV$ with values in $\WW$) which preserves the operations
of  addition and scalar multiplication in the sense that
$$
    \TT(\uu+\vv) = \TT(\uu) + \TT(\vv)
$$
and
$$
   \TT(a\uu) = a \TT(\uu)
$$
for $\uu,\vv\in\VV$ and $a\in\F$.
 \end{definition}

The archetypal example is given by the following

 \begin{theorem}\label{MM-I}
A  map
$
\Aa:\F^{n\times 1}\to \F^{m\times 1}
$
is linear if and only if
there is a (necessarily unique) matrix  $A\in\F^{m\times n}$
such that
 $$
    \Aa(X) = AX
  $$
for all $X\in\F^{m\times n}$.
The linear map $\Aa$ is called the
\jdef{matrix map} determined by $A$.
 \end{theorem}

 \Proof{}
 First assume $\Aa$ is a matrix map. Then
 \begin{eqnarray*}
   \Aa(aX+bY) &=& A(aX+bY) \\
                  &=& a(AX)+b(AY) \\
                  &=& a \Aa(X) + b \Aa(Y)
 \end{eqnarray*}
where we have used the distributive law for matrix multiplication.
This proves that $\Aa$ is linear.

Assume that $\Aa$ is linear.
We must find the matrix $A$. Let $\IC{n}{j}$ be the $j$-th
column of the $n\times n$ identity matrix:
 $$
      \IC{n}{j}= \col_j(I_n)
 $$
so that
 $$
     X= x_1\IC{n}{1}+x_2\IC{n}{2}+\cdots+x_n \IC{n}{n}
 $$
for $X\in\F^{n\times 1}$ (where $x_j=\entry_j(X)$ is the
$j$-th entry of $X$).  Let $A\in\F^{n\times m}$ be the matrix whose
$j$-th column is $\Aa(\IC{n}{j})$:
 $$
      \col_j(A)= \Aa(\IC{n}{j}).
 $$
(This formula shows the uniqueness of $A$.)
Then for $X\in\F^{n\times 1}$ we have
 \begin{eqnarray*}
   \Aa(X) &=&
    \Aa(x_1\IC{n}{1}+x_2\IC{n}{2}+\cdots+x_n\IC{n}{n})\\
   &=& x_1\Aa(\IC{n}{1})+x_2\Aa(\IC{n}{2})+\cdots+x_n\Aa(\IC{n}{n}) \\
   &=& x_1\col_1(A)+x_2\col_2(A)+\cdots+x_n\col_n(A)\\
   &=&AX.
 \end{eqnarray*}
 \QED

 \begin{example}\rm
For a  given linear map $\Aa$ the proof of the
Theorem~\ref{MM-I} shows how to  find the matrix $A$:
substitute in the columns $\IC{n}{k}=\col_k(I_n)$ of the
identity matrix. Here's an example.
Define $\Aa:\F^{3\times 1}\to \F^{2\times 1}$
by
 $$
   \Aa(X) = \Mat{c}3x_1+x_3\\ x_1-x_2\Rix
 $$
for $X\in\F^{3\times 1}$ where $x_j=\entry_j(X)$.
We find a matrix $A\in\F^{2\times 3}$ such that $\Aa(X)=AX$:
 $$
  \Aa\left(\Mat{r}1\\ 0 \\ 0 \Rix\right)=
  \Mat{r} 3 \\ 1\Rix,\;\;
  \Aa\left(\Mat{r}0\\ 1 \\ 0 \Rix\right)=
  \Mat{r} 0 \\ -1\Rix,\;\;
  \Aa\left(\Mat{r}0\\ 0 \\ 1 \Rix\right)=
  \Mat{r} 1 \\ 0\Rix,
  $$
so
 $
   A = \Mat{rrr}
          3 &  0 & 1\\
          1 & -1 & 0
       \Rix.
 $
 \end{example}

\begin{proposition}
The identity map $\II_\VV:\VV\to\VV$
of a vector space is linear.
\end{proposition}

 \begin{proposition}%[Composition Theorem]\Amark\ \label[COMP]
A composition of linear maps is linear.

\iffalse Assume  $\UU$, $\VV$, and $\WW$ are
vector spaces and
 $$
   \SS:\UU\to \VV,\;\;\;
   \TT:\VV\to \WW,
 $$
are linear maps.
For $\uu_1,\uu_2\in\UU$:
 \begin{eqnarray*}
     (\TT\circ \SS)(\uu_1+\uu_2)
&=&      \TT( \SS(\uu_1+\uu_2))\\
&=&      \TT( \SS(\uu_1)+\SS(\uu_2))\\
&=&      \TT( \SS(\uu_1))+\TT(\SS(\uu_2))\\
&=&      (\TT\circ\SS)(\uu_1)+(\TT\circ\SS)(\uu_2)
\end{eqnarray*}
and for  $\uu\in\UU$ and $a\in\F$:
 \begin{eqnarray*}
     (\TT\circ \SS)(a\uu)
&=&      \TT( \SS(a\uu)\\
&=&      \TT(a \SS(\uu)\\
&=&      a\TT(\SS(\uu))\\
&=&      a(\TT\circ\SS)(\uu)
\end{eqnarray*}
Hence the composition $\TT\circ \SS$ is a linear map. \QED
\fi
 \end{proposition}

\iffalse
The identity map $\II_\VV:\VV\to\VV$ is the analog of
the identity matrix in the sense that if $\VV=\F^{n\times 1}$ then
 $$
        \II_\VV(X) = I_nX
 $$
for $X\in\F^{n\times 1}$ where $I_n\in\F^{n\times n}$ is the
$n\times n$ identity matrix. (In other words, the matrix map
determined by the identity matrix is the identity map.)
\fi


 \begin{corollary}%[Iterate Theorem]
   The iterates $\TT^p$ of a linear map $\TT:\VV\to\VV$
   from a vector space to itself are linear maps.
 \end{corollary}



 \begin{definition}\rm\label{VSP-iso}
   Let $\VV$ and $\WW$ be vector spaces.
An \jdef{isomorphism}\footnote{\rm
The word {\em isomorphism} is commonly used in mathematics,
with a variety of analogous - but different - meanings. It comes
from the Greek: {\em iso} meaning {\em same} and {\em morphos}
meaning {\em structure}. The idea is that isomorphic objects should
have the same properties.
}
from $\VV$ to $\WW$ is a linear map
 $
   \TT:\VV\to \WW
 $
which is invertible.
We say that $\VV$ is \jdef{isomorphic} to $\WW$ iff
there is an isomorphism from $\VV$ to $\WW$.
 \end{definition}


 \begin{theorem}%[Linear Inverse Theorem]
\label{VSP-inverse}
  The inverse of an isomorphism  is an isomorphism.
 \end{theorem}

\Proof{} Exercise.

\iffalse
First choose $\ww_1,\ww_2\in\WW$.
Let $\vv_1=\TT^{-1}(\ww_1)$ and $\vv_2=\TT^{-1}(\ww_2)$.
Apply $\TT^{-1}$ to both sides of the equation
 $
    \TT(\vv_1+\vv_2)=\TT(\vv_1)+\TT(\vv_2)
 $
(true because $\TT$ is assumed linear) to obtain
\begin{eqnarray*}
   \TT^{-1}(\ww_1)+ \TT^{-1}(\ww_2)
&=& \vv_1+\vv_2\\
&=& \TT^{-1}(\TT(\vv_1+\vv_2))\\
&=& \TT^{-1}(\TT(\vv_1)+\TT(\vv_2)) \\
&=& \TT^{-1}(\ww_1)+\TT(\ww_2)).
\end{eqnarray*}
Second, choose $\ww\in\WW$ and $a\in\F$.
Let $\vv=\TT^{-1}(\ww)$.
Apply $\TT^{-1}$ to both sides of the equation
 $
    \TT(a\vv)=a\TT(\vv)
 $
(true because $\TT$ is assumed linear) to obtain
\begin{eqnarray*}
   a\TT^{-1}(\ww)
&=& a\vv \\
&=& \TT^{-1}(\TT(a\vv))\\
&=& \TT^{-1}(a\TT(\vv)) \\
&=& \TT^{-1}(a\ww)).
\end{eqnarray*}
We have shown that $\TT^{-1}$ is linear as required.
\QED
\fi


\begin{proposition}%[Isomorphism Laws]
\label{iso-laws}
Isomorphisms satisfy the following properties:
\begin{description}
   \item[(identity)]
       The identity map $\II_\VV:\VV\to \VV$
       of any vector space $\VV$ is an isomorphism.
   \item[(inverse)]
      If $\TT:\VV\to \WW$ is an isomorphism,
      then so is its inverse $\TT^{-1}:\WW\to \VV$.
   \item[(composition)]
      If $\SS:\UU\to \VV$ and $\TT:\VV\to \WW$
      are isomorphisms, then so is the composition
      $\TT\circ\SS:\UU\to \WW$.
 \end{description}
\end{proposition}

\begin{corollary} Isomorphism is an equivalence relation.
This means that it satisfies the following conditions:
 \begin{description}
   \item[(reflexivity)]
       Every vector space  is isomorphic to itself.
   \item[(symmetry)]
      If $\VV$ is isomorphic to $\WW$,
      then $\WW$ is isomorphic to $\VV$.
   \item[(transitivity)]
      If $\UU$  is isomorphic to $\VV$
      and $\VV$ is isomorphic to $\WW$,
      then $\UU$ is isomorphic to $\WW$.
 \end{description}
\end{corollary}


\section{Space of Linear Maps}
Let $\VV$ and $\ww$ be vector spaces. Denote
by $\LMAP(\VV,\WW)$ the \jdef{space of linear maps}\label{LMAP}
from $\VV$ to $\WW$. Thus $\TT\in\LMAP(\VV,\WW)$ if and only if
 \begin{description}
  \item[(i)]   $\TT:\VV\to\WW$,
  \item[(ii)]  $\TT(\vv_1+\vv_2)=\TT(\vv_1)+\TT(\vv_2)$
                 for $\vv_1,\vv_2\in\VV$,
  \item[(iii)] $\TT(a\vv)=a\TT(\vv)$  for $\vv\in\VV$, $a\in\F$.
 \end{description}

 Linear operations on maps from $\VV$ to $\WW$ are
defined \jdef{point-wise}. This means:
 \begin{description}
   \item[(1)] If $\TT,\SS:\VV\to\WW$, then
       $(\TT+\SS):\VV\to \WW$ is defined by
        $$
          (\TT+\SS)(\vv)= \TT(\vv)+\SS(\vv).
        $$
    \item[(2)] If $\TT:\VV\to\WW$ and $a\in\F$, then
       $(a\TT):\VV\to\WW$ is defined by
        $$
          (a\TT)(\vv) = a\TT(\vv).
        $$
     \item[(3)] $\0:\VV\to\WW$ is defined by
        $$
          \0(\vv) = \0.
        $$
 \end{description}

 \begin{proposition}\Amark\
   These operations preserve linearity. In other words,
 \begin{description}
   \item[(1)]  $\TT,\SS\in\LMAP(\VV,\WW)\implies \TT+\SS\in\LMAP(\VV,\WW)$,
   \item[(2)] $\TT\in\LMAP(\VV,\WW),a\in\F\implies a\TT\in\LMAP(\VV,\WW)$,
   \item[(3)] $\0\in\LMAP(\VV,\WW)$.
 \end{description}
\rm (Here $\implies$ means {\em implies}.)
  \ifanswer We'll just prove~(1).
  $$
    \begin{array}{lll}
      (\TT+\SS)(a_1\vv_1+a_2\vv_2)
      &= \TT(a_1\vv_1+a_2\vv_2) + \SS(a_1\vv_1+a+2\vv_2)
      & \mbox{ def. of } \TT+\SS\\
      &= a_1\TT(\vv_1) + a_2\TT(\vv_2)+a_1\SS(\vv_1) + a_2\SS(\vv_2)
      & \mbox{ as } \TT,\SS\in\LMAP(\VV,\WW)\\
      &= a_1(\TT(\vv_1)+\SS(vv_1))+a_2(\TT(\vv_2) + \SS(\vv_2))
      & \mbox{as $W$ is a vector space}\\
      &= a_1(\TT + \SS)(\vv_1)+a_2(\TT+ \SS)(\vv_2)
      & \mbox{ def. of } \TT+\SS.
    \end{array}
  $$
\fi
 \end{proposition}


 Hint for proof: For example,
  to prove~(1) assume that $\TT$ and $\SS$ satisfy~(ii) and~(iii)
  above and show that $\TT+\SS$ also does. By similar methods one
 can also prove that



 \begin{proposition}
   These operations make $\LMAP(\VV,\WW)$ a vector space.
 \end{proposition}

The last two propositions make possible the following

 \begin{corollary}\label{MM-II}
  The map
   $$
      \F^{m\times n}\to\LMAP(\F^{n\times 1},\F^{m\times 1}):
      A\mapsto \Aa
    $$
(which assigns to each matrix $A$ the matrix map $\Aa$ determined
by $A$) is an isomorphism.
 \end{corollary}


\section{Frames and Matrix Representation}
The space $\F^{n\times 1}$ of all column matrices of a given size
is the standard example of a vector space, but not the only
example. This space is well suited to calculations with the
computer since computers are good at manipulating arrays
of numbers. Now we'll introduce a device for converting
problems about vector spaces into problems in matrix theory.

 \begin{definition}\rm\label{def:frame}
A \jdef{frame} for a vector space $\VV$ is an isomorphism
    $$
        \bPhi:\F^{n\times 1}\to\VV
    $$
from the standard vector space $\F^{n\times 1}$ to the given
vector space $\VV$
 \end{definition}


The idea is that $\bPhi$ assigns co-ordinates $X\in\F^{n\times 1}$
to a vector $\vv\in\VV$ via the equation
$$
   \vv=\bPhi(X).
$$
These co-ordinates enable us to transform problems about
vectors into problems about matrices.
The frame is a way of `naming' the vectors $\vv$;
the `names' are the column matrices $X$. The following
propositions are immediate consequences of the
Isomorphism Laws and show that there are lots of frames for a
vector space.




Let
 $
   \bPhi:\F^{n\times 1}\to \VV,
 $
be  a frame for the vector space $\VV$,
 $
   \bPsi:\F^{m\times 1}\to \WW,
 $
be  a frame for the vector space $\WW$,
and
 $
   \TT:\VV\to \WW
 $
be a linear map. These determine a linear map
 $$
   \Aa:\F^{n\times 1}\to \F^{m\times 1}
 $$
by
$$
 \Aa= \bPsi^{-1}\circ\TT\circ\bPhi. \eqno{(1)}
$$
According to the Theorem~\ref{MM-I}
a linear map for $\F^{n\times 1}$ to $\F^{m\times 1}$ is a matrix map.
Thus there is a matrix $A\in\F^{m\times n}$ with
 $$
   \Aa(X)=AX \eqno{(2)}
 $$
for $X\in\F^{n\times 1}$.

 \begin{definition}[Matrix Representation]\rm
We call the matrix $A$ determined by~(1) and~(2)
\jdef{matrix representing}  $\TT$ in the frames $\bPhi$ and $\bPsi$
and say $A$ \jdef{represents} $\TT$ in the frames $\bPhi$ and $\bPsi$.
When $\VV=\WW$ and $\bPhi=\bPsi$ we also call the matrix $A$
the \jdef{matrix representing} $\TT$ in the frame $\bPhi$ and say
that  $A$ \jdef{represents} $\TT$ in the frame $\bPhi$.
 \end{definition}

Equation~(1) says that
 $$
       \bPsi(AX)= \TT(\bPhi(X))
 $$
for $X\in\F^{n\times 1}$.
The following diagram provides a handy way of summarizing this:

\begin{center}
\setlength{\unitlength}{0.1in}
 \begin{picture}(50,18)
     % vertices
         \put(15,12){\makebox(0,0){$\VV$}}
         \put(35,12){\makebox(0,0){$\WW$}}
         \put(15,3){\makebox(0,0){$\F^{n\times 1}$}}
         \put(35,3){\makebox(0,0){$\F^{m\times 1}$}}
      % edges
         \put(17,12){\vector(1,0){15}}
         \put(15,5){\vector(0,1){5}}
         \put(17,3){\vector(1,0){15}}
         \put(35,5){\vector(0,1){5}}
      % labels
         \put(25,14){\makebox(0,0){$\TT$}}
         \put(25,5){\makebox(0,0){$\Aa$}}
         \put(13,8){\makebox(0,0){$\bPhi$}}
         \put(38,8){\makebox(0,0){$\bPsi$}}
 \end{picture}
\end{center}

Matrix representation is
used  to convert problems in linear algebra
to problems in matrix theory. The laws
in this section justify the use of
matrix representation as a computational tool.


 \begin{proposition}\label{MM-III}
 Fix frames $\bPhi:\F^{n\times 1}\to\VV$ and
 $\bPsi:\F^{m\times 1}\to\WW$ as above. Then
  the map
   $$
      \F^{m\times n}\to\LMAP(\VV,\WW):
      A\mapsto \TT=\bPsi\circ\Aa\circ\bPhi^{-1}
    $$
is an isomorphism. The inverse of this isomorphism is the
map which assigns to each linear map $\TT$ the matrix $A$
which represents $\TT$ in the frames $\bPhi$ and $\bPsi$.
 \end{proposition}

\proof{}
This isomorphism is
the composition of two isomorphisms. The first is the isomorphism
 $$
    \F^{m\times n}\to\LMAP(\F^{n\times 1},\F^{m\times 1}):A\mapsto\Aa
  $$
of the Theorem~\ref{MM-II} and the second is the isomorphism
 $$
   \LMAP(\F^{n\times 1},\F^{m\times 1})\to\LMAP(\VV,\WW):
   \Aa\mapsto \bPsi\circ\Aa\circ\bPhi^{-1}.
 $$
The rest of the argument is routine. \QED

\begin{remark}\rm
The theorem asserts two kinds of linearity.
In the first place the expression
 $$
    \TT(\vv) = \bPsi\circ\Aa\circ\bPhi^{-1}(\vv)
  $$
is linear in $\vv$ for fixed $\Aa$.
This is the meaning of the assertion that $\TT\in\LMAP(\VV,\WW)$.
In the second place the expression is linear in $\Aa$ for fixed $\vv$.
This is the meaning of the assertion that the map
$A\mapsto \TT$ is linear.
\end{remark}

\begin{exercise}\rm\label{exr:is-is}
 Show that for any frame $\bPhi:\F^{n\times 1}\to\VV$ the
 identity matrix $I_n$ represents the identity transformation
 $\II_\VV:\VV\to\VV$ in the frame $\bPhi$.
\end{exercise}

\begin{exercise}\rm\label{exr:inv-inv}
 Show that for  any frame $\bPhi:\F^{n\times 1}\to\VV$ the
 identity matrix $I_n$ represents the identity transformation
 $\II_\VV:\VV\to\VV$ in the frame $\bPhi$.
\end{exercise}

\begin{exercise}\rm \label{exr:prod-comp} Suppose
$$
  \bUpsilon:\F^{p\times 1}\to \UU,\;\;
  \bPhi:\F^{n\times 1}\to \VV,\;\;
  \bPsi:\F^{m\times 1}\to \WW,
$$
are frames for vector spaces $\UU$, $\VV$, $\WW$,
respectively and that
 $$
     \SS:\UU\to \VV,\;\; \TT:\VV\to\WW,
 $$
are linear maps.
Let $A\in\F^{m\times n}$ represent $\TT$ in the frames
$\bPhi$ and $\bPsi$ and
$B\in\F^{n\times p}$ represent $\SS$ in the frames
$\bUpsilon$ and $\bPhi$.
Show that the product $AB\in\F^{p\times n}$ represents the composition
   $$
     \TT\circ \SS:\UU\to \WW
   $$
in the frames $\bUpsilon$ and $\bPhi$.
(In other words composition of linear maps corresponds
to multiplication of the representing matrices.)
\end{exercise}

\begin{exercise}\rm \label{exr:iter}
 Suppose that $\TT:\VV\to\VV$ is a linear map
 from a vector space to itself,
 that $\bPhi:\F^{n\times 1}\to\VV$ is a frame,
 and that $A\in\F^{n\times n}$ represents
 $\TT$ in the frame $\bPhi$. Show that for every non-negative
 integer $p$, the power $A^p$ represents
 the iterate $\TT^p$ in the frame $\bPhi$. If $\TT$ is
 invertible (so that $A$ is invertible), then this holds for
 negative integers $p$ as well.
\end{exercise}

\begin{exercise}\rm\label{exr:eval}
Let
$$
       f(\xy) = \sum_{p=0}^m b_p \xy^p
$$
be a polynomial.   We can evaluate $f$
on a linear map $\TT:\VV\to\VV$ from a vector space to itself.
The result is the linear map $f(\TT):\VV\to\VV$ defined by
$$
       f(\TT) = \sum_{p=0}^m b_p \TT^p.
$$
Suppose that $\TT$, $\bPhi$, $A$, are as in Exercise~\ref{exr:iter}.
Show that the matrix $f(A)$ represents the map $f(\TT)$ in the frame $\bPhi$.
\end{exercise}

\begin{exercise}\rm\label{exr:dualspace}
The \jdef{dual space} of a vector
space $\VV$ is the space
$$
    \VV^*=\LMAP(\VV,\F)
$$
of linear maps with values in $\F$. Show that
the map
$$
   \F^{1\times n}\to\bigl(\F^{n\times 1}\bigr)^*:H\mapsto\Hh
$$
defined by
$$
            \Hh(X)=HX
$$
for $X\in\F^{n\times 1}$ is an isomorphism
between $\F^{1\times n}$ and the dual space
of $\F^{n\times 1}$. (We do not distinguish
$F^{1\times 1}$ and $\F$.)
\end{exercise}

\begin{exercise}\rm \label{exr:dualmap}
A linear map $\TT:\VV\to\WW$ determines
a dual linear map $\TT^*:\WW^*\to\VV^*$
via the formula
$$
   \TT^*(\alpha)=\alpha\circ\TT
$$
for $\alpha\in\WW^*$. Suppose that $A$ is the matrix
representing $\TT$ in the frames $\bPhi:\F^{n\times 1}\to\VV$
and $\bPsi:\F^{m\times 1}\to\WW$. Find frames
$\bPhi':\F^{n\times 1}\to\VV^*$
and $\bPsi':\F^{m\times 1}\to\WW^*$
such that the matrix representing $\TT^*$ in this frames
is the transpose $A\tr$.
\end{exercise}



\section{Null Space and Range}
   Let $\VV$ and $\WW$ be vector spaces and
 $$
      \TT:\VV\to \WW
 $$
be a linear map.  The \jdef{null space} of the linear map $\TT:\VV\to\WW$
is the set $\NULLSP(\TT)$
of all vectors $\vv\in\VV$ which are mapped to $\0$ by $\TT$:
 $$
   \NULLSP(\TT) = \{\vv\in\VV: \TT(\vv)=\0\}.
 $$
(The null space is also called the \jdef{kernel} by some
authors.) The \jdef{range} of $\TT$ is the set $\Range(\TT)$ of all
vectors $\ww\in\WW$ of form $w=\TT(\vv)$ for some $\vv\in\VV$:
 $$
   \Range(\TT) = \{\TT(\vv) : \vv\in\VV\}.
 $$

     To decide if a vector $\vv$ is an element of the null space of $\TT$
we first check that it lies in $\VV$ (if $\vv$ fails this test
it is {\em not} in  $\NULLSP(\TT)$)
and then apply $\TT$ to $\vv$;
if we obtain $\0$ then $\vv\in\NULLSP(\TT)$,
otherwise  $\vv\notin\NULLSP(\TT)$.

     To decide if a vector $\ww$ is an element of the range of $\TT$
we first check that it lies in $\WW$ (if $\ww$ fails this test
it is {\em not} in  $\Range(\TT)$)
and then attempt to solve the equation $\ww=\TT(\vv)$ for $\vv\in\VV$.
If we obtain a solution $\vv\in\VV$, then $\ww\in\Range(\TT)$
otherwise  $\ww\notin\Range(\TT)$.
 (Warning: It is conceivable that the formula
defining $\TT(\vv)$ makes sense for certain $\vv$ which are not elements
of $\VV$; in this case the equation $\ww=\TT(\vv)$ may have a solution
$\vv$ but {\em not} a solution with $\vv\in\VV$.
If this happens $\ww\notin\Range(\TT)$.)


\begin{theorem}[One-One/NullSpace]\label{VSP-1-1}
 A linear map $\TT:\VV\to\WW$ is one-one if and only if
 $
   \NULLSP(\TT) = \{\0\}.
 $
\end{theorem}

\Proof{}
If $\NULLSP(\TT)=\{\0\}$ and $\vv_1$ and $\vv_2$
are two solutions of $\ww=\TT(\vv)$ then
$\TT(\vv_1)=\ww=\TT(\vv_2)$ so $\0=\TT(v_1)-\TT(v_2)=\TT(\vv_1-\vv_2)$
so $\vv_1-\vv_2\in\NULLSP(\TT)=\{\0\}$ so $\vv_1-\vv_2=\0$ so
$\vv_1=\vv_2$. Conversely if $\NULLSP(\TT)\ne\{\0\}$ then there
is a $\vv_1\in\NULLSP(\TT)$ with $\vv_1\ne\0$ so the equation
$\0=\TT(\vv)$ has two distinct solutions namely $\vv=\vv_1$ and $\vv=\0$.
\QED


 \begin{remark}[Onto/Range]\rm\label{VSP-onto}
  A map $\TT:\VV\to\WW$ is onto if and only if
 $
   \WW=\Range(\TT)
 $
 \end{remark}



\section{Subspaces}

\begin{definition}\rm Let $\VV$ be a vector space. A \jdef{subspace}
of $\VV$ is a subset $\WW\subseteq \VV$  which
contains the zero vector of $\VV$ and is closed under the
operations of addition and scalar multiplication, that is,
which satisfies
\begin{description}
\item[(zero)]
          $\0\in \WW$;
\item[(addition)]
         $\uu+\vv\in \WW$  whenever $\uu\in \WW$ and $\vv\in \WW$;
\item[(scalar multiplication)]
         $a\uu\in \WW$ whenever $a\in \F$ and $\uu\in \WW$;
\end{description}
\end{definition}

 \begin{remark}\rm
   If $\WW$ is a subspace of a vector space $\VV$,
then $\WW$ is a vector space in its own right: the vector space
operations are those of $\VV$. Thus any theorem about vector spaces
applies to subspaces.
 \end{remark}


 \begin{theorem}  \label{nullspace-subspace}
   The null space $\NULLSP(\TT)$
of the linear map
$\TT:\VV\to \WW$ is
a vector subspace of the vector space $\VV$.
 \end{theorem}

\Proof{}
The space $\NULLSP(\TT)$ contains the zero vector since $\TT(\0)=\0$.
If $\vv_1,\vv_2\in\NULLSP(\TT)$ then $\TT(\vv_1)=\TT(\vv_2)=\0$ so
$\TT(\vv_1+\vv_2) = \TT(\vv_1)+\TT(\vv_2)=\0+\0=\0$
so $\vv_1+\vv_2\in\NULLSP(\TT)$.
If $\vv\in\NULLSP(\TT)$ and $a\in\F$
then $\TT(a\vv)=a\TT(\vv)=a\0=\0$ so that $a\vv\in\F$.
Hence $\NULLSP(\TT)$ is a subspace.
\QED


 \begin{theorem} \label{range-subspace}
   The range $\Range(\TT)$ of the linear map
$\TT:\VV\to \WW$ is  a  subspace of the vector space $\WW$.
 \end{theorem}

\Proof{}
The space $\Range(\TT)$ contains the zero vector since since $\TT(\0)=\0$.
If $\ww_1,\ww_2\in\Range(\TT)$
then $\TT(\vv_1)=\ww_1$ and $\TT(\vv_2)=\ww_2$ for
some $\vv_1,\vv_2\in\VV$ so
$\ww_1+\ww_2=\TT(\vv_1)+\TT(\vv_2)=\TT(\vv_1+\vv_2)$
so $\ww_1+\ww_2\in\Range(\TT)$.
If $\ww\in\Range(\TT)$ and $a\in\F$
then $\ww=\TT(\vv)$ for some $\vv\in\VV$
so $a\ww=a\TT(\vv)=\TT(a\vv)$ so $a\ww\in\Range(\TT)$.
Hence $\Range(\TT)$ is a subspace.
\QED


\section{Examples}\label{vsp-ex}

\subsection{Matrices}
   The spaces $\VV=\F^{p\times q}$ are all vector spaces.
A frame $\bPhi:\F^{pq\times 1}\to\F^{p\times q}$ can be
constructed be taking the first row of $\bPhi(X)$ to
be the first $q$ entries of $X$, the second row
to be the second $q$ entries of $X$ and so on.
For example, with $p=q=2$ we get
 $$
    \bPhi\left(\Mat{c}
        x_1\\x_2\\x_3\\x_4\end{array}
    \right]\right) =
    \Mat{cc}x_1 & x_1\\ x_2 & x_4 \Rix.
  $$
In case $p=1$ and $q=n$ this frame is the transpose map
$$
\F^{n\times 1}\to\F^{1\times n}: X\mapsto X\tr.
$$
More generally, for any $p$ and $q$ the transpose map
 $$
   \F^{p\times q}\to\F^{q\times p}: X\mapsto X\tr
 $$
is an isomorphism. The inverse of
the transpose map from $\F^{p\times q}$ to $\F^{q\times p}$ is
the transpose map  from $\F^{q\times p}$ to $\F^{p\times q}$.
(Proof: $(X\tr)\tr=X$ and $(H\tr)\tr=H$.)

Suppose $P\in\F^{n\times n}$ and $Q\in\F^{m\times m}$ are invertible.
Then the maps
 \begin{eqnarray*}
  && \F^{n\times k}\to\F^{n\times k}:Y\mapsto QY \\
  && \F^{k\times n}\to\F^{k\times n}:H\mapsto HP \\
  && \F^{m\times n}\to\F^{m\times n}: A \mapsto QDP^{-1}
 \end{eqnarray*}
are all isomorphisms. The first of these has been called the
\jdef{matrix map} determined by $Q$ and denoted by $\Qq$.

 \begin{question}\rm\Amark\ What are the inverses of these isomorphisms?
 (Answer: The inverse of $Y\mapsto QY$ is $Y_1\mapsto Q^{-1}Y_1$.
  The inverse of $H\mapsto HP$ is $H_1\mapsto H_1P^{-1}$.
  The inverse of $A\mapsto QAP^{-1}$ is $B\mapsto Q^{-1}BP$.)
 \end{question}

\subsection{Polynomials}\label{poly}
   An important example is the space $\Poly_n(\F)$ of
all polynomials of degree $\le n$.  This is the space
of all functions $f:\F\to \F$ of form
 $$
   f(\xy)=c_0+c_1\xy+c_2\xy^2+\cdots+c_n\xy^n
 $$
for $\xy\in\F$. Here the coefficients $c_0,c_1,c_2,\ldots,c_n$
are chosen from $\F$.  The vector space operations
on $\Poly_n(\F)$ are defined pointwise meaning that
 $$
   (f+g)(\xy)=f(\xy)+g(\xy),\;\;\;(bf)(\xy)=b(f(\xy))
 $$
for $f,g\in\Poly_n(\F)$ and $b\in\F$.
This means that the vector space operations are
also performed `coefficientwise',  as if
the coefficients $c_0,c_1,\ldots,c_n$ were entries in a matrix:
If
 $$
   f(\xy)=c_0+c_1\xy+c_2\xy^2+\cdots+c_n\xy^n
 $$
and
 $$
   g(\xy)=b_0+b_1\xy+b_2\xy^2+\cdots+b_n\xy^n
 $$
then
 $$
  f(\xy)+g(\xy)=
(c_0+b_0)+(c_1+b_1)\xy+(c_2+b_2)\xy^2+\cdots+(c_n+b_n)\xy^n
 $$
and
 $$
   bf(\xy)= (bc_0)+(bc_1)\xy+(bc_2)\xy^2+\cdots+(bc_n)\xy^n.
 $$

 \begin{question}\rm\Amark\   Suppose $f,g\in\Poly_2(\F)$ are given by
 $$
   f(\xy)=2-6\xy+3\xy^2,\;\;\; g(\xy)=4+7\xy.
 $$
What is $5f-2g$? (Answer: $5f(\xy)-2g(\xy)=2-44\xy+15\xy^2$.)
 \end{question}


  If $n\le m$
the space $\Poly_n(\F)$ of all polynomials of degree $\le n$
is a subspace of
the space $\Poly_m(\F)$ of all polynomials of degree $\le m$:
 $$
   \Poly_n(\F)\subseteq \Poly_m(\F) \mbox{ for $n\le m$}.
 $$
A typical element $f$ of $\Poly_m(\F)$ has form
 $$
   f(\xy)=c_0+c_1\xy+c_2\xy^2+\cdots+c_m\xy^m
 $$
and $f$ is an element of the smaller space $\Poly_n(\F)$ exactly
when $c_{n+1}=c_{n+2}=\cdots=c_m=0$.
For example, $\Poly_2(\F)\subseteq\Poly_5(\F)$ since every polynomial
$f$ whose degree is $\le 2$ has degree $\le 5$.
A frame
$$
      \bPhi:\F^{(n+1)\times 1}\to \Poly_n(\F)
$$
for $\Poly_n(\F)$ is defined by
 $$
\bPhi \left(\Mat{c}
          c_0\\c_1\\ c_2 \\ \vdots \\ c_n
         \Rix\right)(\xy) =
       c_0+ c_1\xy+c_2\xy^2+\cdots+c_n\xy^n
  $$
This frame is called the \jdef{standard frame} for $\Poly_n(\F)$.
For example, with $n=2$:
 $$
   \bPhi\left(\Mat{l}
             c_0\\c_1\\c_2
           \Rix\right)(\xy)= c_0+c_1\xy+c_2 \xy^2
 $$

 \begin{remark}\rm
   Think about the notation $\bPhi(X)(\xy)$. The frame $\bPhi$
   accepts a input a matrix $X\in\F^{n\times 1}$ and produces
   as output a polynomial $\bPhi(X)$. The polynomial $\bPhi(X)$
   is itself a map which accepts as input a real number $\xy\in\R$
   and produces as output a number $\bPhi(X)(\xy)\in\F$.
   The equation $\bPhi(X)=f$ might be expressed in words as
   {\em the entries of $X$ are the coefficients of $f$.}
 \end{remark}

 Any $a\in\R$ determines an isomorphism
$\TT_a:\Poly_n(\F)\to\Poly_n(\F)$ via
 $$
   \left(\TT_a(f)\right)(\xy) = f(\xy+a).
 $$
The inverse is given by
 $
    (\TT_a)^{-1}=\TT_{-a}.
 $
The composition $\TT_{-a}\circ\bPhi\F^{(n+1)\times 1}\to\Poly_n(\F)$
of the standard frame $\bPhi$ with the isomorphism $\TT_{-a}$ is given
by
 $$
   \left(\TT_{-a}\circ\bPhi\right)(X)(\xy)=\sum_{k=0}^n b_k (\xy-a)^k
 $$
where $b_k=\entry_{k+1}(X)$. The inverse of this new frame is easily
computed using \jdef{Taylor's Identity}:
 $$
   f(\xy) = \sum_{k=0}^n \frac{f^{(k)}(a)}{k!} (\xy-a)k
 $$
for $f\in\Poly_n(\F)$. Here $f^{(k)}(a)$ denotes the $k$-th
derivative of $f$ evaluated at $a$.



\subsection{Trigonometric Polynomials} \label{trig}
  The vector space $\Trig_n(\F)$ is the space of all functions
$f:\R\to \F$ of form
$$
 f(\xy) = a_0+\sum_{k=1}^n a_k\cos(k\xy)+b_k\sin(k\xy)
$$
for $\xy\in\R$.
Here the coefficients
$b_n,\ldots,b_2,b_1,a_0,a_1,a_2,\ldots, a_n$ are
arbitrary elements of $\F$.
This space is called the space of \jdef{trigonometric polynomials}
of degree $\le n$ with coefficients from $\F$.
The vector space operations are performed
pointwise (and hence coefficientwise) as for polynomials.
Two important  subspaces of $\Trig_n(\F)$ are
 $$
    \Cos_n(\F) = \{f\in\Trig_n(\F): f(-\xy)=f(\xy)\}
  $$
called the space of \jdef{even trigonometric polynomials}
and
 $$
    \Sin_n(\F) = \{f\in\Trig_n(\F): f(-\xy)=-f(\xy)\}.
  $$
  called the space of \jdef{odd trigonometric polynomials}.
The following proposition justifies the notation.

\begin{proposition}
 \begin{description}
     \item[(1)] When $\F=\C$ the space $\Trig_n(\F)$ is the space
      of all functions of form
     $$
         f(\xy)=\sum_{k=-n}^n c_k e^{ik\xy}.
     $$
      \item[(2)] The subspace $\Cos_n(\F)$ is the space of all functions
    $g:\R\to \F$ of form
    $$
      g(\xy) = a_0+ a_1\cos(\xy)+a_2\cos(2\xy) +\cdots + a_n\cos(n\xy).
    $$
    \item[(3)] The subspace $\Sin_n(\F)$ is the space of all functions
    $h:\R\to \F$ of form
    $$   h(\xy) = b_1\sin(\xy)+b_2\sin(2\xy) +\cdots + b_n\sin(n\xy)
    $$
    for $\xy\in\R$.
 \end{description}
\end{proposition}


\noindent A frame
$$
\bPhi_{SC}:\F^{(2n+1)\times 1}\to \Trig_n(\F)
$$
for $\Trig_n(\F)$ is given by
 $$
    \bPhi_{SC}\left(\Mat{c}
      b_n\\ \vdots  \\ b_1 \\ a_0\\a_1 \\ \vdots \\ a_n
      \Rix\right)(\xy) =
       a_0+\sum_{k=1}^n a_k\cos(k\xy)+b_k\sin(k\xy).
 $$
When $\F=\C$ another frame
$$
\bPhi_E:\F^{(2n+1)\times 1}\to \Trig_n(\F)
$$
is given by
 $$
    \bPhi_E\left(\Mat{c}
      c_{-n}\\ \vdots  \\ c_{-1} \\ c_0\\c_1 \\ \vdots \\ c_n
      \Rix\right)(\xy) =
       \sum_{k=-n}^n c_k e^{ik\xy}.
 $$
A frame
$$
\bPhi_C:\F^{(n+1)\times 1}\to \Trig_n(\F)
$$
for $\Cos_n(\F)$ is given by
 $$
    \bPhi_C\left(\Mat{c}
       a_0\\a_1 \\ \vdots \\ a_n
      \Rix\right)(\xy) =
       a_0+\sum_{k=1}^n a_k\cos(k\xy).
 $$
A frame for $\Sin_n(\F)$ is given by
$$
\bPhi_S:\F^{n\times 1}\to \Trig_n(\F)
$$
for $\Sin_n(\F)$ is given by
 $$
    \bPhi_S\left(\Mat{c}
      \\b_1 \\b_2 \\ \vdots \\ b_n
      \Rix\right)(\xy) =
       \sum_{k=1}^n b_k\sin(k\xy).
 $$



If $n\le m$ then
the space $\Sin_n(\F)$ is a subspace of $\Sin_m(\F)$,
the space $\Cos_n(\F)$ is a subspace of $\Cos_m(\F)$, and
the space $\Trig_n(\F)$ is a subspace of $\Trig_m(\F)$.



  \begin{example}\rm
  The function $f:\R\to \F$ defined by
 $$
   f(\xy)= \sin^2(\xy)
 $$
is an element of $\Cos_2(\F)$ because it can be written in the form
 $$
   f(\xy)= a_0+a_1\cos(\xy)+a_2\cos(2\xy)
 $$
(with $a_0=-a_2=1/2$, $a_1=0$) by the half angle formula
 $$
   \sin^2(\xy)= \frac{1}{2}-\frac{1}{2}\cos(2\xy)
 $$
 from trigonometry.
  \end{example}

\subsection{Derivative and Integral}
Recall from calculus the rules for differentiating
and integrating polynomials:
 $$
     f'(\xy) = a_1+2a_2\xy+3a_3\xy^2+\cdots + n a_n\xy^{n-1}
 $$
 $$
     \int_c^\xy f(t)\,dt = -c+a_0\xy+\frac{a_1}{2}\xy^2+\cdots+
                             \frac{a_n}{n+1}\xy^{n+1}
  $$
for
 $$
     f(\xy) = a_0+a_1\xy+a_2\xy^2+\cdots +  a_n\xy^n.
 $$
These operations are linear:
 $$
    (b_1f_1+b_2f_2)'(\xy) = b_1f_1'(\xy)+b_2f_2'(\xy),
  $$
 $$
      \int_c^\xy (b_1f_1(\xy)+b_2f_2(\xy))\,dt =
      b_1\int_c^\xy f_1(\xy)\,dt +
      b_2\int_c^\xy f_2(\xy)\,dt.
  $$
Hence the formulas\footnote{Changing the lower limit
in the integral from $0$ to some other number $c$ gives
a different linear map $\SS$.}
 $$
     \TT(f) = f',\;\;\SS(f)(\xy) = \int_0^\xy f(t)\,dt.
 $$
define linear maps
 $$
   \TT:\Poly_n(\F)\to \Poly_{n-1}(\F),\;\;
     \SS:\Poly_n(\F)\to \Poly_{n+1}(\F)
 $$
Beginners find this a bit confusing: the maps $\TT$ and
$\SS$ accept  polynomials
as input and produce polynomials as output.
But a polynomial is (among other things) a map. Thus
$\TT$ is a map whose inputs are maps  and whose outputs
are maps.

 \begin{question}\rm\Amark\
  Is $\TT$ one-one? onto? What about $\SS$?
 (Answer: $\TT$ is not one-one since $f'=0$ if $f$ is a constant.
$\TT$ is onto since $f'=g$ if $g(\xy)=\int_0^\xy f(t)\,dt$.
$\SS$ is not onto since $\SS(f)(0)=0$ for all $f$ so we can never
solve $\SS(f)=1$ (the constant polynomial). $\SS$ is onto since
$\SS(f')=f$.)
 \end{question}

\begin{remark}\rm
Recall that
the  maps $\TT_1:\VV_1\to \WW_1$ and
$\TT_2:\VV_2\to \WW_2$ are \jdef{equal} iff
$\VV_1=\VV_2$, $\WW_1=\WW_2$, and $\TT_1(\vv)=\TT_2(\vv)$ for
all $\vv\in\VV_1$.
By this definition two maps
$\TT_1:\VV_1\to \WW_1$ and $\TT_2:\VV_2\to \WW_2$
are unequal if either
the sources $\VV_1$ and $\VV_2$ are different or the targets
$\WW_1$ and $\WW_2$ are different.
For example, differentiation also determines a linear map
$\Poly_n(\F)\to \Poly_n(\F):f\mapsto f'$
and we will distinguish this from the linear map
$\Poly_n(\F)\to \Poly_{n-1}(\F):f\mapsto f'$
since the targets are different. (The latter is onto, the former is not.)

The formula $\TT(f)=f'$ can be used to define many other interesting
linear maps depending on the choice of the source and target form
$\TT$. For example, if $f\in\Sin_n(\F)$, then $f'\in\Cos_n(\F)$.
The exercises at the end of the chapter treat some examples like this.
\end{remark}


\section{Exercises} % Vector Spaces

 \begin{exercise}\rm\Amark\
  Let $g_1$ and $g_2$ be the polynomials given by
 $$
   g_1(\xy)= 6-5\xy+\xy^2,\;\;g_2(\xy)= 2 + 3\xy+4\xy^2,
 $$
and define vector spaces
 $$
   \VV_1=\F^{3\times 1},\;\;
   \VV_2=\F^{4\times 1},\;\;
   \VV_3=\Poly_2(\F),\;\;
   \VV_4=\Poly_3(\F),
 $$
and elements
 $$
   \vv_1=\Mat{r}6 \\ -5 \\ 1 \Rix,\;\;
   \vv_2=\Mat{r}1 \\ 2 \\ 4 \Rix,\;\;
   \vv_3=g_1,\;\;
   \vv_4=g_2.
 $$
For which pairs $(i,j)$ is it true that $\vv_i\in\VV_j$?
\ifanswer We have $\vv_i\notin\VV_j$ for $i=1,2$ and $j=3,4$
since a polynomial is never equal to a matrix.
For the same reason, $\vv_i\notin\VV_j$ for $i=3,4$ and $j=1,2$.
$\vv_1,\vv_2\notin\VV_2$ since matrices of different sizes
are never equal. In all other cases, $\vv_i\in\VV_j$.
In particular,  $\vv_3,\vv_4\in\VV_4$ because a polynomial
of degree $\le 2$ has degree $\le 3$.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
  In the notation of the previous exercise define subspaces
   \begin{eqnarray*}
      \WW_1 &=&
      \{\Mat{ccc}a&b&c\Rix: 6a-5b+c=0\}\\
      \WW_2 &=& \{f\in\VV_3: f(2)=0\}\\
      \WW_3 &=& \{f\in\VV_3: f(1)=f(2)=0\}\\
      \WW_4 &=& \{f\in\VV_4: f(1)=f(2)=0\}
   \end{eqnarray*}
When is $\vv_i\in\WW_j$?
\ifanswer The previous exercise gave
$\vv_3,\vv_4\notin\VV_1$ and $\vv_1,\vv_2\notin\VV_4$.
Also $\WW_1\subseteq\VV_1$ and $\WW_2,\WW_3,\WW_4\subseteq\VV_4$.
Hence $\vv_3,\vv_4\notin\WW_1$ and $\vv_1,\vv_2\notin\WW_j$ for $j=2,3,4$.
Furthermore
 \begin{tabular}{ll}
$\vv_1\notin\WW_1$       & since $6(6)-5(-5)+(1)\ne0$.
$\vv_2\in\WW_1$          & since $6(1)-5(2)+(4)=0$,\\
$\vv_3\in\WW_2$          & since $g_1(2)=0$,\\
$\vv_4\notin\WW_2$       & since $g_2(2)=24\ne 0$,\\
$\vv_3\notin\WW_3,\WW_4$ & since $g_1(1)\ne0$,\\
$\vv_4\notin\WW_3,\WW_4$ & since $g_2(1)\ne0$.
 \end{tabular}
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
  In the notation of the previous exercise  which of
  the set inclusions $\WW_i\subseteq\WW_j$ are true?
  \ifanswer $\WW_3\subseteq\WW_4$ and $\WW_3\subseteq\WW_2$.
   All others are false.
\fi
 \end{exercise}


Let us distinguish truth and nonsense.
Only a meaningful equation  can be true or false.
An equation  is nonsense if it contains some notation
(like $0/0$) which has not been defined or if it equates
two objects of different types such as a polynomial and a matrix.
Mathematicians thus distinguish two levels of error.
The equation $2+2=5$ is false, but at least meaningful.
The equation
 $$
    3+\Mat{rr}4 & 0\Rix =7
    \mbox{ (nonsense)}
  $$
 is meaningless  - {\em neither true nor false} - since
 we have not defined how to add a number to a $1\times 2$ matrix.
 Philosophers sometimes call an error like this a  \jdef{category error}.
 Another sort of category error is illustrated by the equation
 $$
    f=   \Mat{rrr} a & b & c\Rix
    \mbox{ (nonsense) }
 $$
  where $f(\xy)= a+b\xy+c\xy^2$.

   \begin{exercise}\rm\Amark\
     Continue the notation  of the previous exercise and define a map
     $$
       \TT:\F^{1\times 3}\to\Poly_2(\F)
     $$
    by
   $$
      \TT\left(\Mat{c}a\\ b\\ c\Rix\right)(\xy)=
       a+b\xy+c\xy^2.
   $$
  Which of the equations $\TT(\vv_i)=\vv_j$ are meaningful?
  Which of the equations $\TT(\WW_i)=\WW_j$ are meaningful?
  Of the meaningful ones which are true?
\ifanswer $\TT(\vv_i)=\vv_j$ is meaningful iff $i=1,2$
  and $\TT(\WW_i)=\WW_j$ is meaningful iff $i=1$.
  Of the meaningful equations, $\TT(\vv_1)=\vv_3$
  and $\TT(\WW_1)=\WW_2$, but the others are (meaningful but) false.
  The equation $\TT(\WW_1)=\WW_1$ contains an implicit category error
  since $\WW_1$ is a set of $1\times 3$ matrices whereas
  $\TT(\WW_1)$ is a set of polynomials.
\fi
   \end{exercise}

\begin{exercise}\rm
Define $\Aa:\F^{2\times 1}\to \F^{2\times 1}$
by
 $$
   \Aa\left(\left[ \begin{array}{c}
     x_1\\x_2
   \Rix\right)=
\left[ \begin{array}{c}
        5x_1+4x_2 \\ 3x_2
       \Rix.
 $$
Find the matrix $A$ such that $\Aa(X)=AX$.
\end{exercise}


 \begin{exercise}\rm
   Prove that a map
 $$
   \TT:\F^{1\times m}\to \F^{1\times n}
 $$
is a linear map if and only if there is a
(necessarily unique) matrix $A\in\F^{m\times n}$ such that
 $$
     \TT(H) = H A
 $$
for all $H\in\F^{1\times m}$.
 \end{exercise}

 \begin{exercise}\rm\Amark\
 For which of the following pairs $\VV$, $\WW$ of vector spaces
does the formula $\TT(f)=f'$ define a linear map
$
  \TT:\VV\to \WW
$
with source $\VV$ and target $\WW$?
$$
 \begin{array}{lll}
   (1) &  \VV=\Poly_3(\F), & \WW=\Poly_5(\F).\\
   (2) &  \VV=\Poly_3(\F), & \WW=\Poly_2(\F).\\
   (3) &  \VV=\Cos_3(\F),  & \WW=\Sin_3(\F).\\
   (4) &  \VV=\Sin_3(\F),  & \WW=\Cos_3(\F).\\
   (5) &  \VV=\Cos_3(\F),  & \WW=\Trig_3(\F).\\
   (6) &  \VV=\Trig_3(\F), & \WW=\Cos_3(\F).\\
   (7) &  \VV=\Poly_3(\F), & \WW=\Cos_3(\F).
 \end{array}
$$
\ifanswer  If for some $f\in\VV$ we have $f'\not\in\WW$
then the formula $\TT(f)=f'$ does not determine
a map with source $\VV$ and target $\WW$.
Thus only~(6) and~(7) are not legal maps.
\fi
 \end{exercise}

 \begin{exercise}\rm \label{frx-1}
 In each of the following you are given
 vector spaces $\VV$ and $\WW$, frames
 $\bPhi:\F^{n\times 1}\to\VV$ and $\bPsi:\F^{m\times 1}\to\WW$,
a linear map $\TT:\VV\to\WW$ and a matrix $A\in\F^{m\times n}$.
Verify that the matrix $A$  represents the map $\TT$ in the frames
$\bPhi$ and $\bPsi$ by proving the identity
  $
       \bPsi(AX)=\TT(\bPhi(X)).
   $
 \begin{description}
  \item[(1)] $\VV=\Poly_2(\F)$, $\WW=\Poly_1(\F)$,
  $\bPhi(X)(\xy)=x_1+x_2\xy+x_3\xy^2$,
  $\bPsi(Y)(\xy)=y_1+y_2\xy$, $\TT(f)=f'$,
  $$
   A=\Mat{rrr} 0 & 1 & 0 \\ 0 & 0 & 2 \Rix.
  $$
  \item[(2)] $\VV$, $\WW$, $\bPhi$, $\bPsi$ as in~(1),
      $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$,
  $$
   A=\Mat{rrr} 0 & 1 & h \\ 0 & 0 & 2 \Rix.
  $$
  \item[(3)] $\VV=\Cos_2(\F)$, $\WW=\Sin_1(\F)$,
  $\bPhi(X)(\xy)=x_1+x_2\cos(\xy)+x_3\cos(2\xy)$,
  $\bPsi(Y)(\xy)=y_1\sin(\xy)+y_2\sin(2\xy)$, $\TT(f)=f'$,
  $$
   A=\Mat{rrr} 0 & -1 & 0 \\ 0 & 0 & -2 \Rix.
  $$
  \item[(4)] $\VV$ and  $\bPhi$  as in~(1),
        $\WW=\F^{1\times 3}$, $\bPsi(Y)=Y\tr$,
  $$
\TT(f)(\xy)= \Mat{rrr}f(0)&f(1)&f(2)\Rix,
 \qquad
   A=\Mat{rrr}
           1 & 1 & 1 \\
           0 & 1 & 2 \\
           0 & 1 & 4
        \Rix.
  $$
  \end{description}
    Here $x_j=\entry_j(X)$ and $y_i=\entry_i(Y)$.
 \end{exercise}


 \begin{exercise}\rm \label{frx-2}
 In each of the following you are given
 a vector space $\VV$,
 a frame $\bPhi:\F^{n\times 1}\to\VV$,
 a linear map $\TT:\VV\to\VV$ from $\VV$ to itself,
 and a matrix $A\in\F^{n\times n}$.
Verify that the matrix $A$  represents the map $\TT$ in the frame
$\bPhi$  by proving the identity
  $
       \bPhi(AX)=\TT(\bPhi(X)).
   $
 \begin{description}
  \item[(1)] $\VV=\Poly_2(\F)$,
             $\bPhi(X)(\xy)=x_1+x_2\xy+x_3\xy^2$,
             $\TT(f)=f'$,
  $$
   A=\Mat{rrr} 0 & 1 & 0 \\ 0 & 0 & 2 \\ 0 & 0 & 0
         \Rix.
  $$
  \item[(2)] $\VV$ and $\bPhi$ as in~(1),
             $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$,
  $$
   A=\Mat{rrr} 0 & 1 & h \\ 0 & 0 & 2 \\ 0 & 0 & 0
   \Rix.
  $$
  \item[(3)] $\VV=\Trig_1(\F)$,
             $\bPhi(X)(\xy)=x_1+x_2\cos(\xy)+x_3\sin(\xy)$,
             $\TT(f)=f'$,
  $$
   A=\Mat{rrr} 0 & 0 & 0 \\ 0 & 0 & 1 \\ 0 & -1 & 0
        \Rix.
  $$
  \item[(4)] $\VV$ and  $\bPhi$  as in~(3),
             $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$,
  $$
   A=\Mat{ccc}
           0 & 0                  & 0 \\
           0 & -h^{-1}(1-\cos h)  & h^{-1}\sin h \\
           0 & -h^{-1}\sin h      & -h^{-1}(1-\cos h)
        \Rix.
  $$
  \end{description}
   Here $x_j=\entry_j(X)$.
 \end{exercise}


 \begin{exercise}\rm
    Which of the following linear maps
$\TT:\VV\to \WW$ is one-one? onto?
 \begin{enumerate}
   \item $\TT:\Poly_3(\F)\to \Poly_2(\F):\TT(f)=f'$.
   \item $\TT:\Poly_3(\F)\to \Poly_3(\F):\TT(f)=f'$.
   \item $\TT:\Poly_2(\F)\to \Poly_3(\F):\TT(f)=\int f$.
   \item $\TT:\Poly_2(\F)\to \Poly_4(\F):\TT(f)=\int f$.
   \item $\TT:\Sin_3(\F)\to \Cos_3(\F):\TT(f)=f'$.
   \item $\TT:\Cos_3(\F)\to \Sin_3(\F):\TT(f)=f'$.
   \item $\TT:\Sin_3(\F)\to \Cos_3(\F):\TT(f)=\int f$.
 \end{enumerate}
Here $f'$ denotes the derivative of $f$ and
$\int f$ stands for the function $F$ defined by
 $$
   F(\xy)=\int_0^\xy f(\tau)\,d\tau.
 $$
(If the map is not one-one find a non-zero $f$ with $\TT(f)=\0$.
If the map is not onto find a  $g$ with $\TT(f)\ne g$ for all $f$.
If the map is one-one find a left inverse.
If the map is onto find a right inverse.)
\end{exercise}

 \begin{question}\rm\Amark\
Conspicuously absent from the list of linear maps
in the last problem is a map
$\Cos_3(\F)\to \Sin_3(\F):\TT(f)=\int f$.
Why?
 (Answer: The constant function $f(\xy)=1$ is in the space $\Cos_3(\F)$
but its integral $F(\xy)=\xy$ is not in the space $\Sin_3(\F)$.)
 \end{question}



 \begin{exercise}\rm\Amark\
  The map $\TT:\Poly_3(\F)\to \Poly_3(\F)$
defined by
 $$
   \TT(f)(\xy) = f(\xy+2)
 $$
is an isomorphism. What is $\TT^{-1}$?
\ifanswer $\TT^{-1}(f)(\xy)=f(\xy-2)$.
\fi
 \end{exercise}

\begin{exercise}\rm
Let
  $
       A=\Mat{rrrr}
         1 & 1 & 1 & 1 \\
         1 & -1 & 1 & -1
       \Rix
   $
and let $\Aa:\F^{4\times 1}\to\F^{2\times 1}$
be the corresponding linear map.
Find a frame $\bPhi:\F^{2\times 1}\to\NULLSP(\Aa)$.
\end{exercise}

 \begin{exercise}\rm\Amark\
  Let
 $
   \VV = \{ f\in \Poly_3(\F): f(1)=f(-1)=0\}.
 $
Find a frame $\bPhi:\F^{2\times 1}\to\VV$.
Hint: This problem is a little bit like the
preceding one.
\ifanswer %
 $$
 \bPhi\left(\Mat{c}z_1\\z_2\Rix\right)(\xy)=
    z_1+z_2\xy-z_1\xy^2-z_2\xy^3.
  $$
\fi
 \end{exercise}

 \begin{exercise}\rm Show that the map
 $$
   \Poly_n(\F)\to\F^{1\times 3}:
   f\mapsto \Mat{ccc}f(0) & f(1) & f(2) \Rix
 $$
is  one-one for $n\le 2$  and onto for $n\ge 2$.
Show that it is not one-one for $n>2$ and not onto for $n=1$.
 \end{exercise}

 \begin{exercise}\rm
 Let
  $$
     \VV=\{ f\in\Poly_n(\F): f(0)=0\}
   $$
  and define $\TT:\VV\to\Poly_{n-1}(\F)$ by $\TT(f)=f'$.
  Show that $\TT$ is an isomorphism and find its inverse.
 \end{exercise}

 \begin{exercise}\rm Show that the map
 $$
   \Poly_n(\F)\to\Poly_n(\F) : f\mapsto F
 $$
where
 $$
   F(\xy) = \xy^{-1}\int_0^\xy f(t)\,dt
 $$
is an isomorphism. What is its inverse?
 \end{exercise}

 \begin{exercise} \rm
 For each of the following four spaces $\VV$ the formula
  $$
     \TT(f)=f''
   $$
  defines a linear map $\TT:\VV\to\VV$ from $\VV$ to itself.
   \begin{description}
    \item[(1)] $\VV=\Poly_3(\F)$
    \item[(2)] $\VV=\Trig_3(\F)$
    \item[(3)] $\VV=\Cos_3(\F)$
    \item[(4)] $\VV=\Sin_3(\F)$
   \end{description}
In which of these four cases is $\TT$ invertible?
In which of these four cases is $\TT^4=\0$?
 \end{exercise}



\chapter{Bases and Frames}

In this chapter we relate the notion
of  frame
to the notion of basis as explained in the first course in linear algebra.
The two notions are essentially the same (if you look at them right).


\section{Maps and Sequences}
Let $\VV$ be a vector space,
$\bPhi:\F^{n\times 1}\to\VV$ be a linear map,
and $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ be a sequence
of elements of $\VV$. We say that
the linear map $\bPhi$ and the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
\jdef{correspond} iff
 $$
    \bphi_j=\bPhi(\IC{n}{j}) \eqno{(1)}
 $$
 for $j=1,2,\ldots,n$ where
$\IC{n}{j}=\col_j(I_n)$ is the $j$-th  column of the identity matrix.



 \begin{theorem}\label{vspace-ass}
A linear map $\bPhi$ and a sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
correspond iff
$$
    \bPhi(X)=x_1\bphi_1+x_2\bphi_2+\cdots+ x_n\bphi_n \eqno{(2)}
$$
for all $X\in\F^{n\times 1}$.  Here $x_j=\entry_j(X)$.
Hence, every sequence corresponds to a unique linear map.
 \end{theorem}

\proof{} Exercise. (Read the rest of this section first.)

 \begin{question}\rm Why is the map $\bPhi$ defined by~(2) linear?
 (Answer:
$ \bPhi(aX+bY)
    = \sum_j (ax_j+by_j)\bphi_j
    = a\left(\sum_jx_j\bphi_j\right)+b\left(\sum_jy_j\bphi_j\right)
    = a\bPhi(X)+b\bPhi(Y)$.)
 \end{question}

\begin{theorem}
Let $\VV^n$ denote the set of sequences of length $n$ from
the vector space $\VV$, and
$\LMAP(\F^{n\times 1},\VV)$ denote the set of linear maps
from $\F^{n\times 1}$ to $\VV$.
Then the map
$$
  \LMAP(\F^{n\times 1},\VV)\to\VV^n:
\bPhi\to\ (\bPhi(\IC{n}{1}), \bPhi(\IC{n}{2}),\ldots,\bPhi(\IC{n}{n}))
$$
is one-one and onto.
\end{theorem}

\proof{} Exercise.

\begin{remark}\rm
Thus
the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
and the corresponding linear map $\bPhi$
carry the same information: each determines the other uniquely.
We will distinguish them carefully for
they are set-theoretically distinct.
The sequence  is an operation which
accepts as input an integer $j$ between $1$ and $n$ and
produces as output an element $\bphi_j$ in the vector space $\VV$.
The linear map is an operation which
accepts as input an element $X$ of the vector space $\F^{n\times 1}$
and produces as output an element $\bPhi(X)$ in the vector space $\VV$.
\end{remark}



\begin{example}\rm
In the special case $n=2$
 $$
    X= \Mat{l} x_1 \\ x_2 \Rix=
    x_1\Mat{l} 1 \\ 0 \Rix+
    x_2\Mat{l} 0 \\ x_2 \Rix
   =x_1\IC{2}{1}+x_2\IC{2}{2}
$$
so equation~(2) is
 $$
 \bphi_1=\bPhi\left(\Mat{l} 1 \\ 0 \Rix\right),
 \;\;
 \bphi_2=\bPhi\left(\Mat{l} 0 \\ 1 \Rix\right).
 $$
and equation~(1) is
 $$
    \bPhi\left(\Mat{l}x_1\\x_2\Rix\right)
= x_1\bphi_1+x_2\bphi_2
 $$
\end{example}

\begin{example}\rm\label{mm-ex}
Suppose $\VV=\F^{m\times 1}$
and form the matrix $A\in\F^{m\times n}$
with columns $\bphi_1,\bphi_2,\ldots,\bphi_n$:
 $$
  \bphi_j= \col_j(A)
 $$
for $j=1,2,\ldots,n$. Now
 $$
    AX= x_1\bphi_1+x_2\bphi_2\cdots+x_n\bphi_n
  $$
where $x_j=\entry_j(X)$. This says that $\bPhi(X)=AX$.
Hence (in this special case) the map $\bPhi$ goes by two names:
it is the  {\it map corresponding to the sequence}
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$
and
it is the {\it matrix map determined by the matrix} $A$
{\em Remember that this is a special case; the
map corresponding to a sequence
is a matrix map only when  $\VV=\F^{m\times 1}$}.
\end{example}

\begin{example}\rm
Suppose $\VV=\F^{1\times m}$ and that
 $$
  \bphi_i= \row_i(B), \qquad i=1,2,\ldots,n
 $$
are the rows of $B\in\F^{n\times m}$.
Then
the map $\bPhi$ is given by
 $$
   \bPhi(X)=X\tr B
 $$
where $X\tr$ is the transpose of $X$.
\end{example}

 \begin{example}\rm
   Recall that $\Poly_n(\F)$ is the space of polynomials
 $$
   f(\xy)=x_0+x_1\xy+x_2\xy^2+\cdots+x_n\xy^n
 $$
of degree $\le n$ with coefficients from $\F$.
For $k=0,1,2,\ldots,n$ define $\bphi_k\in\Poly_n(\F)$ by
 $$
   \bphi_k(\xy)=\xy^k.
 $$
Then the corresponding map
$$
      \bPhi:\F^{(n+1)\times 1}\to \Poly_n(\F)
$$
is defined by $\bPhi(X)=f$ where the coefficients of $f$ are the
entries of $X$: $x_k=\entry_{k+1}(X)$ for $k=0,1,2,\ldots,n$.
For example, with $n=2$:
 $$
   \bPhi\left(\Mat{l}
             x_0\\x_1\\x_2
           \Rix\right)(\xy)= x_0+x_1\xy+x_2 \xy^2
 $$
 \end{example}


\section{Independence}

 \begin{definition}\rm
The sequence  $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
is (linearly) \jdef{independent}
iff the only solution
$x_1,x_2,\ldots,x_n\in\F$
of
$$
     x_1\bphi_1+x_2\bphi_2+\cdots+x_n\bphi_n=\0 \eqno{(\clubsuit)}
$$
is the trivial solution $x_1=x_2=\cdots=x_n=0$.
The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is called
\jdef{dependent} iff it is not  independent,
 that is, iff equation~$(\clubsuit)$ possesses a non-trivial solution,
 (i.e. one with at least one $x_i\ne 0$).
 \end{definition}

\begin{remark}\rm It is easy to confuse the words
{\em independent} and {\em dependent}. It helps to remember
the etymology.  Equation~$(\clubsuit)$ asserts a relation
among the elements of the sequence. Thus the
sequence is {\em dependent} when its elements satisfy a non-trivial
relation. Note also that we have worded the definition in terms
of a {\em sequence} of matrices rather than a {\em set}: repetitions
are relevant. Thus the sequence $(\bphi_1,\bphi_1,\bphi_2)$ is dependent, since
$x_1\bphi_1+x_2\bphi_1+x_3\bphi_2=0$ for $x_1=1$, $x_2=-1$, and $x_3=0$.
\end{remark}

\begin{question}\rm\Amark\ Is the sequence $(\bphi_1,\bphi_2)$ dependent
if $\bphi_2=\0$?
 (Answer: Yes, because then $0\bphi_1+1 \bphi_2=0$).
\end{question}





 \begin{theorem}[One-One/Independence] \sloppy
\label{independent=one-one}
Let $(\bphi_1,\ldots,\bphi_n)$
be a sequence of vectors in the vector space $\VV$ and
$\bPhi:\F^{n\times 1}\to \VV$ be the corresponding map $\bPhi$.
Then the following are equivalent:
 \begin{description}
   \item[(1)] The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is
              independent.
   \item[(2)] The corresponding map $\bPhi$    is one-one.
   \item[(3)] The null space of the corresponding
              linear map  consists only of the zero vector:
              $$
                   \NULLSP(\bPhi)=\{\0\}.
              $$
 \end{description}
 \end{theorem}

\Proof{}
By the definition of $\bPhi$
we can write equation~$(\clubsuit)$ in the form
 $$
   \bPhi(X)=\0 \; \mbox{ where } \;
   X=\Mat{c} x_1\\ x_2\\ \vdots\; \\ x_n\Rix.
 $$
To say that the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is
independent is to say that the only solution of $\bPhi(X)=\0$
is $X=0$; hence parts~(1) and~(3) are equivalent.
According to the Theorem~\ref{VSP-1-1}
parts~(2) and~(3) are equivalent.
\QED

 \begin{example}\rm
 For $A\in\F^{m\times n}$ let $A_j=\col_j(A)\in\F^{m\times 1}$ be the
$j$-th column of $A$ and $x_j=\entry_j(X)$ be the
$j$-th entry of $X\in\F^{1\times n}$. Then
 $$
   AX=x_1 A_1+x_2A_2+\cdots+x_nA_n.
 $$
Hence {\em the columns of $A$ are  independent
if and only if the only solution of the homogeneous system
$AX=0$ is $X=0$.}
\end{example}

 \begin{example}\rm
    Similarly, the rows of $A$ are
   independent if and only if the only solution of the dual
  homogeneous system  $HA=0$ is $H=0$.
 \end{example}




\section{Span}

\begin{definition}\rm
Let $\VV$ be a vector space and $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
be a sequence of vectors from $\VV$.
The sequence  \jdef{spans}
$\VV$ if and only if every element $\vv$ of $\VV$ is expressible as a
linear combination of $(\bphi_1,\bphi_2,\ldots,\bphi_n)$,
that is,
for every $\vv\in\VV$ there exist scalars $x_1,x_2,\ldots,x_n$ such that
$$
   \vv =  x_1\bphi_1+x_2\bphi_2+\cdots+x_n\bphi_n. \eqno{(\diamondsuit)}
$$
\end{definition}

 \begin{theorem}[Onto/Spanning]
\label{span-onto}
Let $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
be a sequence of vectors from the vector space $\VV$ and
$\bPhi:\F^{n\times 1}\to \VV$ be the corresponding map $\bPhi$.
Then the following are equivalent:
 \begin{description}
   \item[(1)] The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
              spans the vector space $\VV$.
   \item[(2)] The corresponding map
              $\bPhi:\F^{n\times 1}\to \VV$ is onto.
   \item[(3)] $\Range(\bPhi)=\VV$.
 \end{description}
 \end{theorem}

\Proof{}
By the definition of $\bPhi$
we can write equation~$(\diamondsuit)$ in the form
 $$
  \vv= \bPhi(X) \; \mbox{ where } \;
   X=\Mat{c} x_1\\ x_2\\ \vdots\; \\ x_n\Rix.
 $$
To say that the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ spans
is to say that there is a solution of $\VV=\bPhi(X)$
no matter what is $\vv\in\VV$; hence parts~(1) and~(2) are equivalent.
Parts~(2) and~(3) are trivially equivalent for the range $\Range(\bPhi)$
of $\bPhi$ is by definition the set of all vectors $\vv$ of form
$\vv=\bPhi(X)$. (See Remark~\ref{VSP-onto}.)
\QED

 \begin{example}\rm
 For $A\in\F^{m\times n}$ let $A_j=\col_j(A)\in\F^{m\times 1}$ be the
$j$-the column of $A$ and $x_j=\entry_j(X)$ be the
$j$-th entry of $X\in\F^{1\times n}$. Then
 $$
   AX=x_1 A_1+x_2A_2+\cdots+x_nA_n.
 $$
Hence {\em the columns of $A$ span the vector space $\F^{m\times 1}$
if and only if for every column $Y\in\F^{m\times 1}$ the inhomogeneous system
$Y=AX$ is has a solution $X$.}
\end{example}

 \begin{example}\rm
 Similarly,the rows of $A$  span $\F^{1\times n}$
 if and only if for every row $K\in\F^{1\times n}$ the dual
  inhomogeneous system  $K=HA$ has a solution $H\in\F^{1\times m}$.
 \end{example}


 \begin{definition}\rm
 Every sequence $\bphi_1,\bphi_2,\ldots,\bphi_n$
spans {\em some} vector space,
namely the space
 $$
   \Span(\bphi_1,\bphi_2,\ldots,\bphi_n)=\Range(\bPhi)
 $$
which is called the \jdef{vector space spanned by}
the sequence
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$. Here
$\bphi_1,\bphi_2,\ldots,\bphi_n\in\VV$ where $\VV$ is
a vector space, and $\bPhi:\F^{n\times 1}\to \VV$
is the  linear map corresponding to this sequence.
Thus
a sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ of elements of $\VV$
spans $\VV$ if and only if
 $$
   \Span(\bphi_1,\bphi_2,\ldots,\bphi_n)=\VV.
 $$
 \end{definition}



 \begin{remark}\rm
   Let $\VV$ be a vector space and $\WW$ be a subspace
of $\VV$: $\WW\subseteq\VV$. Let $\bphi_1,\bphi_2,\ldots,\bphi_n$
be elements of $\VV$. Then the following are equivalent:
 \begin{description}
   \item[(1)] $\bphi_j\in\WW$ for $j=1,2,\ldots,n$;
   \item[(2)] $\Span(\bphi_1,\bphi_2,\ldots,\bphi_n)\subseteq\WW$.
 \end{description}
 \end{remark}

 \begin{exercise}\rm\Amark\
Prove this.
\ifanswer%
 Assume~(1). Choose $\vv\in\Span(\bphi_1,\bphi_2,\ldots,\bphi_n)$.
Then $\vv=x_1\bphi_1+x_2\bphi_2+\cdots+x_n\bphi_n$. Hence $\vv\in\WW$
since the $\bphi_j$ are elements of the subspace $\WW$ and a subspace
is close under the operations of addition and scalar multiplication.
This proves~(2).
\par
Assume~(2). Then~(1) holds since
$\bphi_j\in\Span(\bphi_1,\bphi_2,\ldots,\bphi_n)$:
$\\bphi_j=x_1\bphi_1+x_2\bphi_2+\cdots+x_n\bphi_n$
if $x_j=1$ and $x_i=0$ for $i\ne j$.
\fi
 \end{exercise}

\section{Basis and Frame}\label{vspace-frame}

\begin{definition}\rm
   A \jdef{basis} for the vector space $\VV$ is a sequence
   % $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
 of vectors in $\VV$ which  is both  independent and spans $\VV$.
Recall (see Definition~\ref{def:frame} that a \jdef{frame}
for the vector space $\VV$ is an isomorphism
 $$
   \bPhi:\F^{n\times 1}\to \VV.
 $$
 \end{definition}

 \begin{theorem}[Frame and Basis]
\label{basis-frame}
The sequence $(\bphi_1,\ldots,\bphi_n)$  of vectors in $\VV$ is
a basis for $\VV$ if and only the corresponding linear map
$$
\bPhi:\F^{n\times 1}\to \VV
$$
is a frame.
 \end{theorem}

\Proof{}  The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is
a basis iff it is  independent and spans $\VV$.
By Theorem~\ref{independent=one-one}
the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
is  independent iff the map $\bPhi$ is one-one.
By Theorem~\ref{span-onto}
the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
spans $\VV$ iff map $\bPhi$ is onto.
According to the definition of {\it isomorphism}, the map $\bPhi$
is a frame iff it is invertible.
\QED


 One should think of the vector space  $\VV$ as a
 ``geometric space'' and of the basis
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$ as a vehicle for
introducing co-ordinates in $\VV$.
The correspondence
$\bPhi$ between the ``numerical space'' $\F^{n\times 1}$
and the geometric space $\VV$ constitutes a
co-ordinate system on $\VV$.
This means that the entries of the column
$$
   X=\Mat{c}
      x_1\\ x_2 \\ \vdots \\ x_n
     \Rix
$$
should be viewed as the ``co-ordinates'' of the vector
$$
    \vv = x_1\bphi_1+x_2\bphi_2+\ldots+x_n\bphi_n=\bPhi(X).
$$
When $\vv=\bPhi(X)$ we say that the matrix $X$
\jdef{represents} the vector $\vv$ in the frame $\bPhi$.


In any particular problem we try to
choose the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
(that is, the frame $\bPhi$)
so that numerical description of the problem is as simple
as possible. The notation just introduced can (if used systematically)
be of great help in clarifying our thinking.


\iffalse
 \begin{table}\centering
  \begin{quote}\rm
    Suppose we are presented with a vector space $\VV$ and
a sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ of vectors.
To show that the sequence is a basis
for $\VV$ we must show
 \begin{description}
   \item[(1)] that $\bphi_j\in\VV$ for $j=1,2,\ldots,n$
    (so that there is a corresponding  map $\bPhi:\F^{n\times 1}\to\VV$),
  \item[(2)] that the sequence is independent
      (so that $\bPhi$ is one-one), and
  \item[(3)] that the sequence spans $\VV$
      (so that $\bPhi$ is onto).
 \end{description}
  \end{quote}
 \end{table}
\fi

\section{Examples and Exercises}
\begin{definition}\rm
The columns of the identity matrix
$$
   \IC{n}{1}= \col_1(I_n),\;
   \IC{n}{2}= \col_2(I_n),\; \ldots,\;
   \IC{n}{n}= \col_n(I_n)
$$
form a basis for $F^{n\times 1}$ called the
\jdef{standard basis {\rm for $F^{n\times 1}$}}.
\end{definition}


\noindent The standard basis for $\F^{3\times 1}$ is
$$
      \Mat{c} 1\\0\\0 \Rix,
      \Mat{c} 0\\1\\0 \Rix,
      \Mat{c} 0\\0\\1 \Rix.
$$
Note the obvious equation
$$
     \Mat{c} x_1\\x_2\\x_3 \Rix =
      x_1\Mat{c} 1\\0\\0 \Rix+
      x_2\Mat{c} 0\\1\\0 \Rix+
      x_3\Mat{c} 0\\0\\1 \Rix.
$$
This equation shows that every $X\in \F^{3\times 1}$ has
a unique expression as a linear combination  of the
vectors $\IC{3}{j}$; the coefficients
$x_1,x_2,x_3$ are precisely the entries in the column matrix $x$.
Thus $(\IC{n}{1},\IC{n}{2}, \ldots,\IC{n}{n})$ is a basis for
$\F^{3\times 1}$ as claimed. (The same argument works for arbitrary
$n$ to show that the standard basis is a basis.)


 \begin{question}\rm\Amark\
What is the frame corresponding to the standard basis?
 (Answer: The identity map of $\F^{n\times 1}$.)
 \end{question}

\iffalse
 \begin{quote}
   Do not make the error of thinking that the standard basis
   for $\F^{n\times 1}$ is the only basis. The following proposition
   describes the others.
 \end{quote}
\fi

\begin{proposition}
Let $B_1,B_2,\ldots,B_n \in F^{n\times n}$ and
let $B\in\F^{n\times n}$ be matrix having these
as columns:
 $$
   B=\Mat{llcl}B_1 & B_2 & \cdots & B_n\Rix.
 $$
Then the sequence $(B_1,B_2,\ldots,B_n)$
is a basis for $\F^{n\times 1}$  if and only if
the matrix $B$ is invertible.
The frame corresponding to this basis is the isomorphism
the matrix map $\Bb$ determined by $B$.
\end{proposition}


\Proof{} We have
 $$
    \Bb(X)=BX= x_1B_1+x_2B_2\cdots+x_nB_n
 $$
where $x_j=\entry_j(X)$.
Hence (in this special case) the map $\Bb$ goes by two names:
it is the  {\it map corresponding to the sequence} $(B_1,B_2,\ldots,B_n)$,
and
it is the {\it matrix map determined by the matrix} $B$.
The map $\Bb$ is an isomorphism iff
the matrix $B$ is invertible. By  Theorem~\ref{basis-frame},
the sequence is a basis iff the corresponding map $\Bb$ is an isomorphism.
\QED

\begin{exercise}\rm\Amark\
The vectors
$$
 B_1= \Mat{c} 2\\1 \Rix,\;\;
 B_2= \Mat{c} 1\\1 \Rix
$$
form a basis for $\F^{2\times 1}$ since the matrix
$
    B=\Mat{cc}
              2&1\\
              1&1\\
        \Rix
$
is invertible.
Find the unique numbers $x_1,x_2$ such
$$
         \Mat{c} 1\\9 \Rix=
     x_1 \Mat{c} 2\\1 \Rix+
     x_2 \Mat{c} 1\\1 \Rix
$$
\ifanswer $
        \Mat{c} 1\\9   \Rix=
     -8 \Mat{c} 2\\1 \Rix+
     17 \Mat{c} 1\\1 \Rix
$
\fi
\end{exercise}

 \begin{example}\rm The set $\{\0\}$  consisting of the
single element $\0\in\VV$ is a subspace of the vector space
$\VV$. It is called the \jdef{zero subspace}.
By convention the \jdef{empty sequence} $()$
is a basis for the zero vector space.
 \end{example}


 \begin{example}\rm
Suppose that the numbers $a,b,c$ are not all zero.
Let
$\VV$ be the set of all
$\Mat{l}x \\ y \\ z\Rix\in\F^{1\times 3}$
such that $ax+by+cz=0$.
Geometrically, $\VV$ is a plane through the origin. If $c\ne 0$,
a basis is given by
by
 $$
   \bphi_1 = \Mat{r} c \\ 0 \\ -a \Rix,\;\;
   \bphi_2 = \Mat{r} 0 \\ c \\ -b \Rix.
 $$
To prove this we must show three things:
(1)~that $\bphi_1,\bphi_2\in\VV$,
(2)~that the sequence $(\bphi_1,\bphi_2)$ is independent,  and
(3)~that the sequence $(\bphi_1,\bphi_2)$ spans $\VV$. Part~(1)
follows from the calculations
 $$
   a(c)+b(0)+c(-a)=0,\;\;\; a(0)+b(c)+c(-b)=0.
 $$
Part~(2) follows from the equation
 $$
   x_1\bphi_1+x_2\bphi_2 =
\Mat{r}cx_1 \\ cx_2 \\ -ax_1-bx_2\Rix
 $$
so that (as $c\ne 0$) the equation $x_1\bphi_1+x_2\bphi_2 =0$
implies $x_1=x_2=0$. Part~(3) follows from the observation
that if $ax+by+cz=0$, then
 $$
   \Mat{c} x \\ y \\ z \Rix=
     \frac{x}{c}\bphi_1+\frac{y}{c}\bphi_2.
 $$
 \end{example}



\begin{example}\rm Let
 $$
   R=\Mat{rrrrr}
          1 & 0 & c_{13} & c_{14} & c_{15} \\
          0 & 1 & c_{23} & c_{24} & c_{25} \\
          0 & 0 & 0      & 0      & 0
        \Rix.
 $$
A basis for the null space of the matrix map determined by $R$
is $(\bphi_1,\bphi_2,\bphi_3)$ where
 $$
   \bphi_1=\Mat{c}
           -c_{13}  \\  -c_{23}  \\  1 \\  0  \\   0
         \Rix,\;\;\;
   \bphi_2=\Mat{c}
            -c_{14}  \\ -c_{24}  \\ 0 \\ 1 \\ 0
       \Rix,\;\;\;
   \bphi_3=\Mat{c}
            -c_{15} \\ -c_{25} \\ 0 \\ 0 \\ 1
       \Rix.
 $$
 \end{example}

\begin{example}\rm
Let
$$
 R =
    \Mat{ccccccc}
      1 & c_{11} & 0 &  c_{12} & 0 & c_{13} & c_{14} \\
      0 & c_{21} & 1 &  c_{22} & 0 & c_{23} & c_{24} \\
      0 & c_{31} & 0 &  c_{32} & 1 & c_{33} & c_{34} \\
      0 & 0      & 0 &  0      & 0 & 0      & 0      \\
      0 & 0      & 0 &  0      & 0 & 0      & 0
    \Rix.
$$
A basis $(\bphi_1,\bphi_2,\bphi_3,\bphi_4)$ for
the null space of matrix map determined by $R$ is
 $$
   \bphi_1=\Mat{cccc}
       -c_{11} \\  1 \\ -c_{21} \\ 0 \\ -c_{31} \\ 0 \\ 0
           \Rix,\;\;\;
   \bphi_2 =\Mat{cccc}
       -c_{12} \\  0 \\ -c_{22} \\ 1 \\ -c_{32} \\ 0 \\ 0
             \Rix,\;\;\;
   \bphi_3=\Mat{cccc}
        -c_{13} \\ 0 \\ -c_{23} \\ 0 \\ -c_{33} \\ 1 \\ 0
              \Rix,\;\;\;
   \bphi_4=\Mat{cccc}
        -c_{14} \\ 0 \\ -c_{24} \\ 0 \\ -c_{34} \\ 0 \\ 1
           \Rix.
 $$
\end{example}


 \begin{example}\rm
  Recall
that $\Poly_n(\F)$ is the space of
all polynomials of degree $\le n$.  This is the space
of all functions $f:\F\to \F$ of form
 $$
   f(\xy)=a_0+_1\xy+a_2\xy^2+\cdots+a_n\xy^n
 $$
for $\xy\in\F$. Here the coefficients $a_0,a_1,a_2,\ldots,a_n$
are chosen from $\F$. A frame
 $$
   \bPhi:\F^{(n+1)\times 1}\to \Poly_n(\F)
 $$
is given by $\bPhi(X)=f$ where
 $$
   X=\Mat{c} a_0\\a_1\\a_2\\ \vdots\\a_n \Rix
 $$
where the coefficients $a_0,a_1,a_2,\ldots,a_n$ of $f\in\Poly_n(\F)$ are
the entries of $X\in\F^{(n+1)\times 1}$. In other words, the
polynomials
$$
   \bphi_k(\xy)=\xy^k \mbox{ for $k=0,1,2,\ldots,n$}
$$
form a basis for $\Poly_n(\F)$.
 \end{example}


 \begin{exercise}\rm
Verify the formula
 $$
   a_k = \frac{f^k(0)}{k!}
 $$
for a polynomial $f\in\Poly_n(\F)$ and $k=0,1,2,\ldots,n$.
Here the numerator $f^k(0)$ is the
$k$-th derivative of $f=f(\xy)$ with respect to $\xy$ evaluated at
$\xy=0$. (This formula proves that the frame $\bPhi$ is one-one.)
 \end{exercise}


 \begin{example}\rm
  Recall
that $\Sin_n(\F)$ is the space of all functions
$f:\R\to \F$ of form
 $$
   f(\xy) = b_1\sin(\xy)+b_2\sin(2\xy) +\cdots + b_n\sin(n\xy)
 $$
for $\xy\in\R$. Here the coefficients $b_1,b_2,\ldots, b_n$ are
arbitrary elements of $\F$. The $n$ functions
$$
   \bphi_k(\xy)=\sin(k\xy) \mbox{ for $k=1,2,\ldots,n$}
$$
span $\Sin_n(\F)$ by  definition.
The corresponding map
$$
\bPhi:\F^{n\times 1}\to \Sin_n(\F)
$$
is given by $\bPhi(X)=f$ where
 $$
    X=\Mat{c}b_1\\b_2\\ \vdots \\ b_n\Rix
 $$
is the column of coefficients. The map $\bPhi$ is onto because
the sequence $(\bphi_1,\ldots,\bphi_n)$ spans $\Sin_n(\F)$.
The following
exercise shows that it is one-one and hence a frame.
 \end{example}

 \begin{exercise}\rm
Show that for $f\in\Sin_n(\F)$ and $k=1,2,\ldots,n$ we have
 $$
    b_k = \frac{2}{\pi} \int_0^\pi f(\xy)\sin(k\xy)\;d\xy.
 $$
(Hint: Show
 $$
     \int_0^\pi \sin(mt)\sin(k\xy)\;d\xy = 0
 $$
if $k\ne m$.)
 \end{exercise}

 \begin{example}\rm
  Recall
that  $\Cos_n(\F)$ is the space of all functions
$f:\R\to \F$ of form
 $$
   f(\xy) = a_0+ a_1\cos(\xy)+a_2\cos(2\xy) +\cdots + a_n\cos(n\xy)
 $$
for $\tau\in\R$. Here the coefficients $a_0,a_1,a_2,\ldots, a_n$ are
arbitrary elements of $\F$. The $n+1$ functions
$$
   \bphi_k(\xy)=\cos(k\xy) \mbox{ for $k=0,1,2,\ldots,n$}
$$
span $\Cos_n(\F)$ by definition.
The corresponding map
$\bPhi:\F^{(n+1)\times 1}\to \Cos_n(\F)$
is given by $\bPhi(X)=f$ where
 $$
    X=\Mat{c}a_0\\a_1\\a_2\\ \vdots \\ a_n\Rix
 $$
is the column of coefficients. The map $\bPhi$ is onto because
the sequence $(\bphi_0,\ldots,\bphi_n)$ spans $\Cos_n(\F)$.
The following
exercise shows that it is one-one and hence a frame.
 \end{example}

 \begin{exercise}\rm
    Express each of the coefficient $a_k$
$k=0,1,2,\ldots,n$ of $\cos(k\xy)$ in $f\in\Cos_n(\F)$ in terms
of an integral involving $f$, thus verifying that the correspondence
$\bPhi$ is one-one.
 \end{exercise}

 \begin{example}\rm
Recall
that $\Trig_n(\F)$ is the space of all functions
$f:\R\to \F$ of form
 $$
   f(\xy) = a_0+\sum_{k=1}^n a_k\cos(k\xy)+b_k\sin(k\xy)
 $$
for $t\in\R$. Here the coefficients
$b_n,\ldots,b_2,b_1,a_0,a_1,a_2,\ldots, a_n$ are
arbitrary elements of $\F$.
The $2n+1$ functions
$$
   \bphi_{-k}(\xy)=\sin(k\xy) \mbox{ for $k=1,2,\ldots,n$}
$$
 $$
   \bphi_k(\xy)=\cos(k\xy) \mbox{ for $k=0,1,2,\ldots,n$}
 $$
span for $\Trig_n(\F)$ by definition. The map $\bPhi$ is onto since
the sequence $(\bphi_{-n},\ldots,\bphi_n)$ spans $\Trig_n(\F)$. The following
exercise shows that it is one-one and hence a frame.
 \end{example}

 \begin{exercise}\rm
    Express each of
the coefficients $a_k$ ($k=0,1,2,\ldots,n$) of $\cos(kt)$
and each of the  coefficients $b_k$  ($k=1,2,\ldots,n$ of $\sin(kt)$
of $f\in\Trig_n(\F)$ in terms of an integral involving $f$,
thus verifying that the correspondence $\bPhi$ is one-one.
You will need to verify the following identities:
 $$
   \int_{-\pi}^\pi \cos(m\xy)\sin(k\xy)\,d\xy=0
\mbox{ for all integers $m,k$}
 $$
 $$
   \int_{-\pi}^\pi \cos(m\xy)\cos(k\xy)\,d\xy=0
\mbox{ for all integers $m\ne k$}
 $$
 $$
   \int_{-\pi}^\pi \sin(m\xy)\sin(k\xy)\,d\xy=0
\mbox{ for all integers $m\ne k$}
 $$
 \end{exercise}

 \begin{definition}\rm
   The basis constructed in each of the preceding examples
 is called the \jdef{standard basis} for the corresponding
 vector space and the corresponding frame is called the
 \jdef{standard frame}. \rm For example, the standard basis
 for $\Poly_2(\F)$ is the sequence $(\bphi_0,\bphi_1,\bphi_2)$
 given by $\bphi_j(\xy)=\xy^j$. Note the discrepancy
between the subscript and the place in the sequence:
the second element of the sequence is $\bphi_1$ (not $\bphi_2$).
 \end{definition}

\section{Cardinality}
In the next section we shall define the {\em dimension}
of a vector space $\VV$.
It is the analog of the {\em cardinality} of a finite set.
A set $X$ is \jdef{finite} iff for some $n$
there is an invertible
map $\bphi: \{1,2,\ldots,n\}\to X$; the number $n$ is
therefore the cardinality of the set $X$. The number
$n$ is called the \jdef{cardinality} of the finite set $X$;
it is the number of elements in the set $X$.
For an invertible map
 $$
   f:\{1,2,\ldots,n\}\to \{1,2,\ldots,m\}
$$
we have that $m=n$.
{\em
If $\phi: \{1,2,\ldots,m\}\to X$ and $\psi: \{1,2,\ldots,m\}\to X$
are both invertible, then
$\psi^{-1}\circ\phi: \{1,2,\ldots,n\}\to\{1,2,\ldots,m\}$
is also invertible, so $m=n$.} This little argument shows that
the cardinality of the set $X$ as defined above is legally defined,
that is, that the number $n$ is independent of the choice of $\phi$.
The definition of {\em dimension} of a vector space given in the next
section proceeds in an analogous fashion.



\section{The Dimension Theorem}
Just as the cardinality
of a finite set is the number of its elements,
so the dimension of a vector space is the length of
a  basis for that vector space. To be sure that this is
a legal definition we need the

 \begin{theorem}[Dimension Theorem]\label{dim-thm}
 Let $(\bpsi_1,\ldots,\bpsi_m)$  be a basis for the
 vector space $\VV$ and  $(\bphi_1,\bphi_2,\ldots,\bphi_n)$  be a
 sequence of vectors from $\VV$. Then
  \begin{description}
    \item[(1)] If $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is independent,
              then $n\le m$.
    \item[(2)] If $(\bphi_1,\bphi_2,\ldots,\bphi_n)$  spans $\VV$.
              then $m\le n$.
    \item[(3)] If $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is a basis for $\VV$.
              then $m=n$.
  \end{description}
 \end{theorem}

\Proof{} Let $\bPhi:\F^{n\times 1}\to \VV$
correspond to $(\bphi_1,\ldots,\bphi_n)$
and $\bPsi:\F^{m\times 1}\to \VV$ correspond to $(\bpsi_1,\ldots,\bpsi_m)$.
Then $\bPsi$ is linear isomorphism so
we may form the composition
$$
\Aa=\bPsi^{-1}\circ\bPhi:\F^{n\times 1}\to \F^{m\times 1}.
$$
By  Theorem~\ref{MM-I}
the linear map $\Aa$ determines a matrix
$A\in\F^{m\times n}$ satisfying
 $$
   \Aa(X)=AX
 $$
for $X\in\F^{n\times 1}$.  Now
 \begin{description}
   \item[(1)] $\Aa$ is one-one iff $\bPhi$ is,
   \item[(2)] $\Aa$ is onto iff $\bPhi$ is, and
   \item[(3)] $\Aa$ is invertible iff $\bPhi$ is,
 \end{description}
so the result follows
from the Theorem~\ref{PRQ-dim-thm}.
\QED

Part~(3) of the Dimension Theorem says that any two bases
for a vector space $\VV$ have the same number of elements.
This justifies the following

 \begin{definition}\rm
A vector space $\VV$ is \jdef{finite dimensional} iff it
has a basis $(\bpsi_1,\bpsi_2,\ldots,\bpsi_m)$. The number
$m$ of vectors in a basis for $\VV$  is called the
\jdef{dimension} of $\VV$.
 \end{definition}

 \begin{example}\rm The dimension of $\F^{2\times 2}$ is $4$.
  A basis is given by
 $$
   \bphi_1=\Mat{rr}1&0\\ 0&0\Rix,\;\;
   \bphi_2=\Mat{rr}0&1\\ 0&0\Rix,\;\;
   \bphi_3=\Mat{rr}0&0\\ 1&0\Rix,\;\;
   \bphi_4=\Mat{rr}0&0\\ 0&1\Rix.
 $$
 \end{example}

 \begin{question}\rm\Amark\ What is the dimension
      of $\F^{n\times 1}$?
      of $\F^{p\times q}$?
      of $\Poly_n(\F)$?
      of $\Trig_n(\F)$?
 (Answer: $\dim(\F^{n\times 1})=n$,
         $\dim(\F^{p\times q})=pq$,
         $\dim(\Poly_n(\F))=n+1$,
         $\dim(\Trig_n(\F))=2n+1$.)
 \end{question}


\medskip


Parts~(1) and~(2) of the Dimension Theorem may be phrased
as follows:  Suppose that the vector space $\VV$ has dimension $m$.
Then any independent sequence of vectors from $\VV$ has length $\le m$
and any sequence which spans $\VV$ has length $\ge m$. Hence


 \begin{corollary}
    Suppose that $\VV$ has dimension $n$ and that
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is a sequence of vectors
from $\VV$. Then the following are equivalent:
 \begin{description}
   \item[(1)] The sequence is independent.
   \item[(2)] The sequence spans $\VV$.
   \item[(3)] The sequence is a basis for $\VV$.
 \end{description}
 \end{corollary}

 \begin{question}\rm\Amark\ Suppose that
$\bphi_1,\bphi_2\in\F^{1\times 3}$. Is it true
that the sequence $(\bphi_1,\bphi_2)$ is a basis
for $\F^{1\times 3}$ if and only if it is  independent?
 (Answer: No. In fact, a sequence of length $2$
can never be a basis for a vector space of dimension $3$
by the Dimension Theorem. It might however be independent,
for example, the first two elements of a basis.
 \end{question}

\begin{remark}\rm
 For a vector space $\VV$ the following conditions
have the same meaning:
\begin{description}
 \item[(1)] $\VV$ has dimension $n$.
 \item[(2)] $\VV$ has a basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ of length $n$.
 \item[(3)] There is an isomorphism (frame)   $\bPhi:\F^{n\times 1}\to\VV$.
\end{description}
\end{remark}

\section{Isomorphism}

 \begin{theorem}%[Isomorphism Theorem]
\label{iso-thm}
If $\TT:\VV\to \WW$
is an isomorphism, and
if the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is a basis for $\VV$,
then  the sequence
$(\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n))$ is a basis
for $\WW$.
 \end{theorem}

\Proof{} In other words the composition $\TT\circ\bPhi$
of the isomorphism $\TT:\VV\to \WW$ with
the frame $\bPhi:\F^{n\times 1}\to \VV$
corresponding to the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
is a frame $\TT\circ\bPhi:\F^{n\times 1}\to \WW$.
\QED

 \begin{corollary}\label{isodim}
If two finite dimensional vector spaces are
   isomorphic, then they have the same dimension.
 \end{corollary}

\begin{question}\rm\Amark\
 Is the converse of this corollary true?
 (Answer: Yes. If $\VV$ and $\WW$ both have dimension $n$,
then they are each isomorphic to $\F^{n\times 1}$ and hence
to each other.)
\end{question}

\begin{example}\rm
The sequence of polynomials $(1,\xy,\xy^2,\ldots,\xy^n)$ forms
a basis for the $(n+1)$-dimensional vector space
$\Poly_n(\F)$ of polynomials of degree $\le n$.
Each number $a$ determines an isomorphism
$\TT$ from $\Poly_n(\F)$ to itself via the formula
 $$
   \TT(f)(\xy) = f(\xy-a);
 $$
the inverse isomorphism is defined by
 $$
   \TT^{-1}(g)(\xy)=g(\xy+a).
 $$
Hence the sequence of polynomials
$(1,\xy-a,(\xy-a)^2,\ldots,(\xy-a)^n)$ forms another basis
for $\Poly_n(\F)$. A polynomial $f$ may be expressed
in terms of this basis using \jdef{Taylor's formula}:
 $$
    f(\xy) = \sum_{k=0}^n \frac{f^{(k)}(a)}{k!} (\xy-a)^k
 $$
where $f^{(k)}(a)$ is the $k$-th derivative of
$f$ evaluated at $a$.
 \end{example}


\section{Extraction}

 \begin{lemma}
   Assume that the sequence $(\bphi_1,\ldots,\bphi_k,\bphi_{k+1})$ spans
   $\VV$ and that $\bphi_{k+1}$ is a linear combination
    of $(\bphi_1,\ldots,\bphi_k)$:
     $$
       \bphi_{k+1} = a_1\bphi_1+\cdots+a_k\bphi_k.
     $$
 Then the shorter sequence
   $(\bphi_1,\ldots,\bphi_k)$ also spans $\VV$.
 \end{lemma}


\Proof{}
Choose $\vv\in\VV$. Then there are constants
$b_1,\ldots,b_k,b_{k+1}$ such that
 $$
   \vv=b_1\bphi_1+b_2\bphi_2+\cdots+b_k\bphi_k+b_{k+1}\bphi_{k+1}
 $$
since $(\bphi_1,\ldots,\bphi_k,\bphi_{k+1})$ spans $\VV$.
Into this equation substitute the expression
for $\bphi_{k+1}$ to obtain
 $$
   \vv= (b_1+b_{k+1}a_1)\bphi_1+(b_2+b_{k+1}a_2)\bphi_2+
    \cdots+(b_k+b_{k+1}a_k)\bphi_k
 $$
showing that $\vv$ is a linear combination of $\bphi_1,\ldots,\bphi_k$.
Thus $(\bphi_1,\ldots,\bphi_k)$ spans $\VV$.
\QED


 \begin{theorem}[Extraction Theorem]\label{extract}
 Assume that the sequence
$$
(\bphi_1,\phi_2,\ldots,\bphi_m)
$$
 spans a vector space $\VV$ of dimension $n$.
Then there is a subsequence
$$
  (\bphi_{i_1},\bphi_{i_2},\ldots,\bphi_{i_n})
$$
which is a basis for $\VV$.
 \end{theorem}


\Proof{}
The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_m)$ spans $\VV$.
If it is not a basis,
then there is a relation
 $$
   c_1\bphi_1+c_2\bphi_2+\cdots+c_m\bphi_m=0
 $$
where not all of the coefficients $c_1,c_2,\ldots,c_m$ are
zero. Suppose for example that $c_1\ne 0$. Then
we may express $\bphi_1$ as a linear combination of
$\bphi_2,\ldots,\bphi_m$:
 $$
   \bphi_1= -\frac{c_2}{c_1}\bphi_2 - \cdots -\frac{c_m}{c_1}\bphi_m
 $$
and so $(\bphi_2,\ldots,\bphi_m)$ also spans $\VV$.
Repeat this process until you get a sequence which is  independent.
\QED

 \begin{corollary}
Let $\TT:\VV\to\WW$ be a linear map and
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$ be a basis for $\VV$.
Then there is a subsequence
$(\bphi_{i_1},\bphi_{i_2},\ldots,\bphi_{i_r})$ such that
$(\TT(\bphi_{i_1}),\TT(\bphi_{i_2}),\ldots,\TT(\bphi_{i_r}))$
forms a basis for  $\Range(\TT)\subseteq\WW$.
 \end{corollary}

\Proof{} In order to apply Lemma~\ref{extract} we must prove
that
$$
\Range(\TT)=\Span(\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n)).
$$
This is seen as follows. Choose $\ww\in\Range(\TT)$.
Then $\ww=\TT(\vv)$ for some $\vv\in\VV$ by the definition of
the range. But then
$\vv=\sum_jc_j\bphi_j$ for some numbers $c_j$ since
$(\bphi_1,\ldots,\bphi_n)$ is a basis for $\VV$.
Then
$$
 \ww=\TT(\vv)=\TT\left(\sum_{j=1}^nc_j\bphi_j\right) =
 \sum_{j=1}^n c_j\TT(\bphi_j)\in
 \Span(\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n))
$$
as required.



 \begin{example}\rm
 The  first, third, and fourth columns of the matrix
 $$
   R=\Mat{lllll}
      1 & c_{11} & 0 & 0 & c_{12} \\
      0 & c_{21} & 1 & 0 & c_{22} \\
      0 & c_{31} & 0 & 1 & c_{32} \\
      0 & 0      & 0 & 0 & 0
     \Rix
 $$
form a basis  the range of the map
 $$
    \F^{5\times 1}\to\F^{4\times 1}: X\mapsto RX.
  $$
 \end{example}


\section{Extension}


\begin{lemma}
If the sequence
$(\bphi_1,\bphi_2,\ldots,\bphi_k)$ is
   independent  and
   $\bphi_{k+1}\notin\Span(\bphi_1,\bphi_2\ldots,\bphi_k)$,
then the longer sequence $(\bphi_1,\bphi_2\ldots,\bphi_k,\bphi_{k+1})$
is  independent.
\end{lemma}

\Proof{}  If the sequence
$(\bphi_1,\ldots,\bphi_{k+1})$ were not independent
there would be a non-trivial relation
 $$
   c_1\bphi_1+c_2\bphi_2+\cdots+c_k\bphi_k+c_{k+1}\bphi_{k+1}=0.
 $$
In this relation we must have $c_{k+1}\ne 0$, since
$(\bphi_1,\bphi_2,\ldots,\bphi_k)$ is  independent.
But then
 $$
   \bphi_{k+1} = -\frac{c_1}{c_{k+1}}\bphi_1
               -\frac{c_2}{c_{k+1}}\bphi_2 -\cdots
                -\frac{c_k}{c_{k+1}}\bphi_k,
 $$
contradicting the hypothesis that $\bphi_{k+1}$ is not
in $\Span(\bphi_1,\bphi_2,\ldots,\bphi_k)$.\QED


 \begin{theorem}[Extension Theorem]\label{extend}
  Let $\VV$ be a vector space of dimension $n$.
 Any  independent sequence
$$
(\bphi_1,\bphi_2,\ldots,\bphi_m)
$$
 of elements of $\VV$ may be extended to a basis
$$
(\bphi_1,\bphi_2,\ldots,\bphi_m,\bphi_{m+1},\bphi_{m+2},\ldots,\bphi_n)
$$
 for $\VV$.
 \end{theorem}




\Proof{}
The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_m)$ is  independent.
If it is not a basis  for $\VV$ then it must fail to span,
so there must be an element $\bphi_{m+1}\in\VV$ which
is not in the span of the sequence:
 $$
     \bphi_{m+1}\notin\Span(\bphi_1,\bphi_2,\ldots,\bphi_m).
 $$
We may append $\bphi_{m+1}$ to the sequence
and, by the lemma,  the result
$$
(\bphi_1,\bphi_2,\ldots,\bphi_m,\bphi_{m+1})
$$
is still  independent.
Repeat this process until you get a sequence which spans $\VV$.
The process must terminate within $n-m$ steps by the
Dimension Theorem. \QED

\section{One-sided Inverses}

A map between sets is one-one
if and only if it has a left inverse;
it is onto
if and only if it has a right inverse.
Analogs of these statements hold for linear maps
between finite dimensional vector spaces.
These analogs say more: namely that there exist {\em linear inverses}.
To prove this we need the following

 \begin{lemma}
  Let $(\bpsi_1,\bpsi_2,\ldots,\bpsi_m)$ be a basis for
  a vector space $\WW$ and let $\VV$ be another vector space.
Then for any sequence
 $(\vv_1,\vv_2,\ldots,\vv_m)$
there is a unique linear map $\SS:\WW\to\VV$
such that $\SS(\bpsi_i)=\vv_i$ for $i=1,2,\ldots,m$.
\end{lemma}

\Proof{}
To prove this simply choose $\ww\in\WW$ and write it as
a linear combination of the $\bpsi_i$:
 $$
   \ww = y_1\bpsi_1+y_2\bpsi_2+\cdots+y_m\bpsi_m
 $$
where $y_i\in\F$. If $\SS$ is linear and satisfies
$\SS(\bpsi_i)=\vv_i$ then applying $\SS$ to the equation for $\ww$
gives
$$
   \SS(\ww)= y_1\vv_1+y_2\vv_2+\cdots+y_m\vv_m.
$$
This shows the uniqueness of $\SS$. To show existence
use this formula to define $\SS$.
The definition is legal since the representation of $\ww$
is unique.
We leave it to the reader to show
that $\SS$ defined in this way  is linear. \QED

 \begin{remark}\rm This lemma is a generalization
of the concept of the Theorem~\ref{vspace-ass}.
It may be restated  as follows:
{\it
 $$
   \LMAP(\WW,\VV)\to\VV^m:
\SS\mapsto (\SS(\bpsi_1),\SS(\bpsi_2),\ldots,\SS(\bpsi_m))
 $$
is a one-one onto correspondence.
Here $\LMAP(\WW,\VV)$ denotes of the set of linear maps of $\WW$ to $\VV$,
and $\VV^n$ denotes the set of sequences
of elements from the vector space $\VV$.}
 \end{remark}

 \begin{corollary}[Left Inverse Theorem]
   A linear map $\TT:\VV\to\WW$ between finite dimensional
  vector spaces is one-one if and only if it has a
  linear left inverse $\SS:\WW\to\VV$.
 \end{corollary}

  \Proof{} Assume that $\TT$ is one-one.
Let $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ be a basis
for $\VV$ and $\bPhi$ denote the corresponding frame.
Then $\TT\circ\bPhi$ is one-one, so the sequence
$(\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n))$
is linearly independent.
Extend to a basis
$$
 (\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n),
  \bpsi_{m+1},\bpsi_{m+2},\ldots,\bpsi_n)
$$
 for $\WW$. Now let $\SS$ be any linear map satisfying
$\SS(\TT(\bphi_j))=\bphi_i$ for $i=j,2,\ldots,m$.
(If $m>n$, then $\SS(\bpsi_i)$ can be anything:
there is more than one left inverse.)
\QED

 \begin{corollary}[Right Inverse Theorem]
   A linear map $\TT:\VV\to\WW$ between finite dimensional
  vector spaces is onto if and only if it has a
  linear right inverse $\SS:\WW\to\VV$.
 \end{corollary}

\Proof{} Assume that $\TT$ is onto,
Let $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ be a basis
for $\VV$ and $\bPhi$ denote the corresponding frame.
Then $\TT\circ\bPhi$ is onto, so the sequence
$(\TT(\bphi_1),\TT(\bphi_2),\ldots,\TT(\bphi_n))$
spans $\WW$. Extract a basis
 $$
    (\TT(\bphi_{j_1}),\TT(\bphi_{j_2}),\ldots,\TT(\bphi_{j_m})
 $$
for $\WW$. Then define $\SS$ by $\SS(\TT(\bphi_{j_1})=\bphi_{j_1}$


\section{Independence and Span}
The notion of {\em linear independence} can be defined in
terms of the operation
$$
(\bphi_1,\bphi_2,\ldots,\bphi_n)\mapsto \Span(\bphi_1,\bphi_2,\ldots,\bphi_n)
$$
which assigns to a sequence
the space which it spans. This is the content of the next  proposition.


 \begin{proposition}
   The sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is
dependent if and only if some element $\bphi_j$ of the
sequence is in the space spanned by the remaining elements:
$$
   \bphi_j\in\Span(\bphi_1,\ldots,\bphi_{j-1},\bphi_{j+1},\ldots,\bphi_n).
$$
 \end{proposition}

 \begin{exercise}\rm\Amark\
 Prove this.
\ifanswer Assume
$$
   \bphi_j\in\Span(\bphi_1,\ldots,\bphi_{j-1},\bphi_{j+1},\ldots,\bphi_n).
\eqno(\spadesuit)
$$
Then there are numbers
$x_1,\ldots,x_{j-1},x_{j+1}\ldots,x_n$ such that
 $$
    \bphi_j = x_1\bphi_1+\cdots+x_{j-1}\bphi_{j-1}+
             x_{j+1}\bphi_{j+1}+\cdots+x_n\bphi_n.
 $$
If we define $x_j=-1$, then not all the numbers $x_1,\ldots,x_n$ are
zero (since $x_j=-1$) and
 $$
     x_1\bphi_1+\cdots+x_j\bphi_j+\cdots+x_n\bphi_n=0
 $$
so $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ are  dependent.
\par
  Conversely, assume that the sequence $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
  is  dependent.
Then there are numbers $x_1,x_2,\ldots,x_n$, not all zero, such that
 $$
   x_1\bphi_1+x_2\bphi_2+\cdots+x_n\bphi_n=0.
 $$
Suppose that $x_j\ne 0$. Then
 $$
    \bphi_j = -\frac{x_1}{x_j}\bphi_1-\cdots-\frac{x_{j-1}}{x_j}\bphi_{j-1}
             -\frac{x_{j+1}}{x_j}\bphi_{j+1}-\cdots-\frac{x_n}{x_j}\bphi_n
 $$
so $(\spadesuit)$ holds.
\fi
 \end{exercise}

 \begin{example}\rm
     Let
     $$
       \bphi_1=\Mat{r}1 \\ 4 \\ 7\Rix,\;\;\;
       \bphi_2=\Mat{r}2 \\ 5 \\ 8\Rix,\;\;\;
       \bphi_3=\Mat{r}3 \\ 6 \\ 9\Rix.
     $$
Then the sequence $(\bphi_1,\bphi_2,\bphi_3)$ is  dependent since
 $$
   \bphi_1-2\bphi_2+\bphi_3=0
 $$
and $\bphi_1\in\Span(\bphi_2,\bphi_3)$ since
 $$
    \bphi_1 = 2\bphi_2-\bphi_3.
 $$
 \end{example}

\section{Rank and Nullity}

\begin{definition}\rm
The \jdef{rank} of a linear map is the dimension of
its range.
The \jdef{nullity} of a linear map is the dimension of
its null space.
The rank (or nullity)  of a matrix is the rank (or nullity) of the
corresponding matrix map.
\end{definition}

\begin{theorem}[Rank Nullity Relation]\label{RNR} \sloppy
The rank
and nullity of a linear map
$$
\TT:\VV\to\WW
$$
are related by
$$
   \dim(\Range(\TT))+\dim(\NULLSP(\TT))=
  \dim(\VV).
$$
\end{theorem}

\Proof{} Extend a basis $(\bphi_1,\ldots,\bphi_k)$
for $\NULLSP(\TT)$ to a basis
$(\bphi_1,\ldots,\bphi_n)$ for $\VV$.
Then
$(\TT(\bphi_{k+1}),\ldots,\TT(\bphi_n))$
is a basis for $\Range(\TT)$. \QED



\section{Exercises} %Basis and Frame

 \begin{exercise}\rm
Let the column vectors $\bphi_1,\bphi_2,\bphi_3\in\F^{3\times 1}$ be defined
by
 $$
   \bphi_1=\Mat{r}1\\ 4\\ 7 \Rix,\;\;\;
   \bphi_2=\Mat{r}2\\ 5\\ 8 \Rix,\;\;\;
   \bphi_3=\Mat{r}3\\ 6\\ 9 \Rix
 $$
and let $\bPhi:\F^{3\times 1}\to \F^{3\times 1}$ be the
linear map corresponding to the sequence $\bphi_1,\bphi_2,\bphi_3$.
Find a matrix $A\in\F^{3\times 3}$ such that $\bPhi(X)=AX$ for
$X\in\F^{3\times 1}$.
 \end{exercise}


 \begin{exercise}\rm
Let the row vectors $\bphi_1,\bphi_2,\bphi_3\in\F^{1\times 3}$ be defined by
$$
   \bphi_1=\Mat{rrr}1& 4& 7 \Rix,
$$
$$
   \bphi_2=\Mat{rrr}2& 5& 8 \Rix,
$$
$$
   \bphi_3=\Mat{rrr}3& 6& 9 \Rix
$$
and let $\bPhi:\F^{3\times 1}\to \F^{1\times 3}$ be the
linear map corresponding to the sequence $(\bphi_1,\bphi_2,\bphi_3)$.
Find a matrix $A\in\F^{3\times 3}$ such that $\bPhi(X)=X\tr A$ for
$X\in\F^{3\times 1}$ where $X\tr$ is the transpose of $X$.
 \end{exercise}

 \begin{exercise}\rm\Amark\
Let
 $
   A=\Mat{rrr} 1&2&3\\ 4&5&6\\ 3&3&3\Rix.
 $
   Show that the columns of $A$
are  dependent by finding $x_1,x_2,x_3$, not all zero, such
that
 $$
   x_1\col_1(A)+x_2\col_2(A)+x_3\col_3(A) = 0.
 $$
\ifanswer $x_1=1$, $x_2=-2$, $x_3=1$.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
Let $A$ be as in the previous problem.
Show that the rows of $A$
are  dependent by finding $x_1,x_2,x_3$, not all zero, such
that
 $$
   x_1\row_1(A)+x_2\row_2(A)+x_3\row_3(A) = 0.
 $$
\ifanswer $x_1=1$, $x_2=-1$, $x_3=1$.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
 Are there numbers $x_1,x_2,x_3$ (not all zero) which simultaneously
solve both of the previous two problems?
\ifanswer No.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
    Let $\bphi_1,\bphi_2,\bphi_3\in\Poly_2(\F)$  be given by
 \begin{eqnarray*}
   \bphi_1(\xy)&=& 1+2\xy+3\xy^2,\\
   \bphi_2(\xy)&=& 4+5\xy+6\xy^2,\\
   \bphi_3(\xy)&=& 3+3\xy+3\xy^2.
 \end{eqnarray*}
Show that $\bphi_1,\bphi_2,\bphi_3$ are  dependent.
Which of the previous problems is this most like?
\ifanswer%
 The system
$$
   x_1(1+2\xy+3\xy^2)+x_2(4+5\xy+6\xy^2)+x_3(3+3\xy+3\xy^2)=0
$$
and the system
$$
   x_1\Mat{rrr}1& 2& 3 \Rix+
   x_2\Mat{rrr}4& 5& 6 \Rix+
   x_3\Mat{rrr}3& 3& 3 \Rix = 0
$$
have the same solutions $x_1,x_2,x_3$.
\fi
 \end{exercise}

\begin{exercise}\rm\Amark\ Let $W_1,W_2\in\F^{2\times 1}$.
When is the sequence $(W_1,W_2)$ independent?
\ifanswer This question is somewhat open-ended. One answer might be
{\it the sequence is independent if and only if it is independent}.
This is true, but  cannot be expected to earn you many points on a test.
Another answer might be {\it the sequence is independent iff the only
solution of $x_1W_1+x_2W_2=0$ is $x_1=x_2=0$}. This answer shows
that at least you have memorized the definition. Here is a better answer.
Let $W_1,W_2\in\F^{2\times 1}$  be given by
$$
 W_1=\Mat{c} a\\ c \Rix,\;\;
 W_2=\Mat{c} b\\ d \Rix,
$$
and let $A\in\F^{2\times 2}$ be the matrix whose columns
are $W_1,W_2$:
$$
A=\Mat{cc} a & b\\ c& d \Rix.
$$
Then the system $x_1W_1+x_2W_2=0$ may be written
in matrix form as $AX=0$ where $x_j=\entry_j(X)$.
By definition $(W_1,W_2)$ are independent iff
there is no non-zero solution $X$ of $AX=0$ and  this is so exactly
when $A$ is invertible. This is so exactly when the determinant $\det(A)=ad-bc$
is non-zero.
\fi
\end{exercise}

\begin{exercise}\rm\Amark\ When does $\Span(W_1,W_2)=\F^{2\times 1}$?
\ifanswer With $W_1,W_2,A,X$ as in the last solution,
the equation $V=x_1W_1+x_2W_2$ is equivalent to the
inhomogeneous  system $Y=AX$ where $Y=V$.
If the determinant $ad-bc$ is not zero, then $A$ is invertible
and there is, {\em for any choice of} $y_1,y_2$,
a unique solution $x_1,x_2$, namely $X=A^{-1}Y$:
$$
  x_1=\frac{y_1d-y_2b}{ad-bc},\;\;
  x_2=\frac{ay_1-cy_2}{ad-bc}.
$$
In this case $\Span(W_1,W_2)=\F^{2\times 1}$.
If the determinant $ad-bc$ is zero, then one of the equations
is a multiple of the other and we can find $y_1,y_2$ so that
there is no solution $x_1,x_2$. For example,
if $(a,b)=(mc,md)$, then there will be a solution $x_1,x_2$
only if $y_2=my_1$. Thus if $ad-bc=0$, then
$\Span(W_1,W_2)\ne \F^{2\times 1}$.
\fi
\end{exercise}

\begin{exercise}\rm\Amark\ When does $\Span(W_1,W_2,W_3)=\F^{2\times 1}$?
\ifanswer%
Let $W_1,W_2,W_3\in\F^{2\times 1}$  be given by
$$
 W_1=\Mat{c} a_1\\ b_1 \Rix,\;\;
 W_2=\Mat{c} a_2\\ b_2 \Rix,\;\;
 W_3=\Mat{c} a_3\\ b_3 \Rix.
$$
Then $\Span(W_1,W_2,W_3)=\F^{2\times 1}$ if and only if
at least one of the three determinants
$$
a_1b_2-a_2b_1,\;\;a_1b_3-a_3b_1,\;\;a_3b_2-a_3b_1,\;\;
$$
is not zero. This in turn is false iff one row of the matrix
$$
     A=\Mat{lll} a_1&a_2&a_3\\ b_1&b_2b_3\Rix
$$
is a multiple of the other.
\fi
\end{exercise}

\begin{exercise}\rm\Amark\  Let $W_1,W_2,W_3\in\F^{2\times 1}$.
When is  $(W_1,W_2,W_3)$ independent?
\ifanswer Never. If $A\in\F^{3\times 2}$ there is always a non-zero
$X$ with $AX=0$. (Dimension Theorem)
\fi
\end{exercise}

 \begin{exercise}\rm
    Let $\bphi_1,\bphi_2,\bphi_3\in\Cos_2(\F)$  be given by
 \begin{eqnarray*}
   \bphi_1(\xy)&=& 1+2\cos(\xy)+3\cos(2\xy),\\
   \bphi_2(\xy)&=& 4+5\cos(\xy)+6\cos(2\xy),\\
   \bphi_3(\xy)&=& 3+3\cos(\xy)+3\cos(2\xy).
 \end{eqnarray*}
Show that $\bphi_1,\bphi_2,\bphi_3$ are  dependent.
Which of the previous problems is this most like?
 \end{exercise}

 \begin{exercise}\rm\Amark\
Let
 $
   A=\Mat{rrr} 1&2&3\\ 4&5&6\\ 3&3&3\Rix.
 $
   Show that the columns of $A$
do not span $\F^{3\times 1}$ by finding $Y\in\F^{3\times 1}$,
such that the inhomogeneous system
 $$
   Y= x_1\col_1(A)+x_2\col_2(A)+x_3\col_3(A)
 $$
has no solution $x_1,x_2,x_3$.
\ifanswer%
 Any column $Y$
which does not satisfy
$y_1-y_2+y_3=0$; for example,
$Y=\Mat{r}1 \\ 0\\ 0 \Rix$.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
Let $A$ be as in the previous problem.
Show that the rows of $A$
do not span $\F^{1\times 3}$ by finding $K\in\F^{1\times 3}$,  such
that the inhomogeneous  system
 $$
   K= x_1\row_1(A)+x_2\row_2(A)+x_3\row_3(A)
 $$
has no solution $x_1,x_2,x_3$.
\ifanswer%
 Any row $K$ which does not satisfy
$k_1-2k_2+k_3=0$; for example,
$K=\Mat{rrr}1 & 0 & 0 \Rix$.
\fi
 \end{exercise}


 \begin{exercise}\rm
    Let $\bphi_1,\bphi_2,\bphi_3\in\Poly_2(\F)$  be given by
 \begin{eqnarray*}
   \bphi_1(\xy)&=& 1+2\xy+3\xy^2,\\
   \bphi_2(\xy)&=& 4+5\xy+6\xy^2,\\
   \bphi_3(\xy)&=& 7+8\xy+9\xy^2.
 \end{eqnarray*}
Show that $\bphi_1,\bphi_2,\bphi_3$ do not span $\Poly_2(\F)$
by exhibiting a polynomial
$$
     f(\xy)=a_0+a_1\xy+a_2\xy^2
$$
which can not be written in the form
$$
     f(\xy)=x_1\bphi_1(\xy)+x_2\bphi_2(\xy)+x_3\bphi_3(\xy).
$$
Which of the previous problems is this most like?
 \end{exercise}




 \begin{exercise}\rm
Verify that
 $$
  f(\xy) =
f(a) +f'(a)(\xy-a)+\frac{f''(a)}{2}(\xy-a)^2+\frac{f'''(a)}{6}(\xy-a)^3
 $$
for $f(\xy)=c_0+c_1\xy+c_2\xy^2+c_3\xy^3$.
 \end{exercise}



\begin{exercise}\rm\Amark\ Let $D\in\F^{m\times n}$ be of form:
 $$
   D=\Mat{ll}
          I_r                 & 0_{r\times (n-r)} \\
          0_{(m-r)\times r}   & 0_{(m-r)\times (n-r)}
      \Rix
 $$
where $I_r$ is the $r\times r$ identity matrix.
When are the columns of $D$ independent?
When do they span $\F^{m\times 1}$?
\ifanswer Then the columns of $D$ are independent iff $r=n$ and
the columns of $D$ span $\F^{m\times 1}$ iff $r=m$.
\fi
\end{exercise}


 \begin{exercise}\rm
 Let $R_j=\col_j(R)$ be the $j$-th column of the matrix
 $$
   R=\Mat{lllll}
      1 & c_{11} & 0 & 0 & c_{12} \\
      0 & c_{21} & 1 & 0 & c_{22} \\
      0 & c_{31} & 0 & 1 & c_{32} \\
      0 & 0      & 0 & 0 & 0
     \Rix
 $$
and $A=QR$ where $Q$ is invertible.
Let $A_j=\col_j(A)$ be the $j$-th
column of $A$.
Show that $(A_1,A_3,A_4)$ is a basis for
$\Span(A_1,A_2,A_3,A_4,A_5)$.

 \end{exercise}




 \begin{exercise}[Lagrange Interpolation]\rm \label{ex:Lagrange}
   Let $\lambda_0,\ldots,\lambda_n$ be distinct numbers
and $(\bphi_0,\ldots,\bphi_n)$ be the
sequence of polynomials
given by
 $$
  \bphi_k(\xy) = \frac{\prod_{j\ne k} (\xy-\lambda_j)}{\prod_{j\ne k} (\lambda_k-\lambda_j)}.
 $$
Show that this sequence is a basis for $\Poly_n(\F)$.
Given $b_0,b_1,b_2,\ldots,b_n$ there is a  unique polynomial
$f\in\Poly_n(\F)$ such that
 $$
   f(\lambda_j)=b_j,\;\; \mbox{ for $j=0,1,2,\ldots,n$.}
 $$
Express $f$ as a linear combination of $\bphi_0,\bphi_1,\ldots,\bphi_n$.
Hint: What is $\bphi_k(\lambda_i)$?
 \end{exercise}


 \begin{exercise}[Transitivity Lemma]\rm\Amark\
   Suppose $\VV$ is a vector space and that
$\bphi_1,\bphi_2,\ldots,\bphi_n$,
$\bpsi_1,\bpsi_2,\ldots,\bpsi_m$, and $\vv$
are elements of $\VV$.
Assume
$$
   \bpsi_i\in\Span(\bphi_1,\bphi_2,\ldots,\bphi_n)
$$
for $i=1,2,\ldots,m$ and
$$
   \vv\in\Span(\bpsi_1,\bpsi_2,\ldots,\bpsi_m)
$$
Show that
 $$
  \vv\in\Span (\bphi_1,\bphi_2,\ldots,\bphi_n).
 $$
\end{exercise}

 \begin{exercise}\rm\Amark\
Assume
 $$
   \bphi_{m+j}\in\Span(\bphi_1,\bphi_2,\ldots,\bphi_m)
 $$
for $j=1,2,\ldots,n-m$. Show that
 $$
   \Span(\bphi_1,\bphi_2,\ldots,\bphi_m)=\Span(\bphi_1,\bphi_2,\ldots,\bphi_n).
 $$
%(This is an abstract version of the
%Column Lemma on page~\pageref{col-lemma}.)
\ifanswer Since
\begin{eqnarray*}
\bphi_j\in\Span(\bphi_1,\ldots,\bphi_n)&&\mbox{ for $j=1,\ldots,m$, and}\\
\bphi_j\in\Span(\bphi_1,\ldots,\bphi_m)&&\mbox{ for $j=1,\ldots,n$,}
\end{eqnarray*}
the Transitivity Lemma (previous exercise) gives
\begin{eqnarray*}
\Span(\bphi_1,\ldots,\bphi_m)\subseteq\Span(\bphi_1,\ldots,\bphi_n),
&&\mbox{ and}\\
\Span(\bphi_1,\ldots,\bphi_n)\subseteq\Span(\bphi_1,\ldots,\bphi_m).&&
\end{eqnarray*}
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
For $j=1,2,3,4,5$, let $R_j=\col_j(R)$ be the $j$-th column
of the matrix
 $$
   R=\Mat{lllll}
       1 & c_{12} & 0 & c_{14} & c_{15} \\
       0 & c_{22} & 1 & c_{24} & c_{25} \\
       0 & 0      & 0 & 0      & 0
     \Rix.
 $$
Show that $\Span(R_1,R_3)= \Span(R_1,R_2,R_3,R_4,R_5)$.
\ifanswer
 $
   R_j = c_{1j}R_1 + c_{2j} R_3\in\Span(R_1,R_3)
 $
for $j=2,4,5$ so
  \begin{eqnarray*}
   \Span(R_1,R_3) &=& \Span(R_1,R_3,R_2,R_4,R_5)\\
                  &=& \Span(R_1,R_2,R_3,R_4,R_5)
   \end{eqnarray*}
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
Prove that if
$\sigma:\{1,2,\ldots,n\}\to \{1,2,\ldots,n\}$
is a permutation of $1,2,\ldots,n$, then
 $$
    \Span(\bphi_1,\bphi_2,\ldots,\bphi_n)=
    \Span(\bphi_{\sigma(1)},\bphi_{\sigma(2)},\ldots,\bphi_{\sigma(n)}).
 $$
\ifanswer $\sum_j x_j\bphi_j = \sum_k x_{\sigma(k)} \bphi_{\sigma(k)}$.
\fi
 \end{exercise}

 \begin{exercise}\rm\Amark\
  Let
   $$
      B_1 = \Mat{r}1 \\ 4 \\ 7 \Rix,\;\;\;
      B_2 = \Mat{r}2 \\ 5 \\ 8 \Rix.
    $$
  Extend the sequence $(B_1,B_2)$ to a basis
$(B_1,B_2,B_3)$ for $F^{3\times 1}$
\ifanswer Any $B_3$ for which the matrix
$B=\Mat{lll}B_1&B_2&B_3\Rix$
is invertible will do, for example
 $
   B_3=\Mat{l}0 \\ 0 \\ 1 \Rix.
 $
\fi
 \end{exercise}




\chapter{Matrix Representation}

A matrix $A\in\F^{m\times n}$ determines
a matrix map $\Aa:\F^{n\times 1}\to\F^{m\times 1}$
(see Theorem~\ref{MM-I}) and the isomorphism
$$
   \F^{m\times n}\to\LMAP(\F^{n\times 1},\F^{m\times 1}):A \mapsto\Aa
$$
(see Corollary~\ref{MM-II}) says that a matrix and a linear map
from $\F^{n\times 1}$ to $\F^{m\times 1}$ are essentially the same thing.
We have seen (Theorem~\ref{basis-frame}) that a frame
$\bPhi:\F^{n\times 1}\to\VV$
and a basis for the vector space $\VV$ are essentially the same thing
and that the map
$$
   \F^{m\times n}\to\LMAP(\VV,\WW):A \mapsto\bPsi\Aa\circ\bPhi^{-1}
$$
determined by two frames $\bPhi$ and $\bPsi$ is an isomorphism.
In this chapter we see how this isomorphism relates
the  vector space theory to matrix theory.



\section{The Representation Theorem}\label{sec:MatRep}

Assume
 \begin{description}
   \item[(1)]
      $\VV$ is a finite dimensional vector space of dimension $n$.
   \item[(2)]
      $\WW$ is a finite dimensional vector space of dimension $m$.
   \item[(3)]
      $\IC{n}{j}=\col_j(I_n)$ is the $j$-th column of the $n\times n$
      identity matrix.
   \item[(4)]
      $\IC{m}{i}=\col_i(I_m)$ is the $i$-th column of the $m\times m$
      identity matrix.
   \item[(5)]
     $\bPhi:\F^{n\times 1}\to \VV$ is a frame for $\VV$.
  \item[(6)]
     $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is the basis corresponding to
     the frame $\bPhi$. Thus $\bphi_j=\bPhi(\IC{n}{j})$ for $j=1,2,\ldots,n$.
   \item[(7)]
     $\bPsi:\F^{m\times 1}\to \WW$ is a frame for $\WW$.
  \item[(8)]
     $(\bpsi_1,\bpsi_2,\ldots,\bpsi_m)$ is the basis corresponding to
     the frame $\bPsi$. Thus $\bpsi_i=\bPsi(\IC{m}{i})$ for $i=1,2,\ldots,m$.
 \end{description}


 \begin{proposition}[Representation Theorem]\label{REP-thm}
   Let  $\TT:\VV\to \WW$ be a linear map.
   The matrix $A$ representing the  map $\TT$
   in the frames $\bPhi$ and $\bPsi$ is characterized
   by the equations
 $$
    \TT(\bphi_j) = \sum_{i=1}^m a_{ij}\bpsi_i \eqno{(3)}
 $$
for $j=1,2,\ldots,n$. Here
$\bphi_j$ is the $j$-th element of
the basis corresponding to the frame $\bPhi$,
$\bpsi_i$ is the $i$-th element of
the basis corresponding to the frame $\bPsi$,
and $a_{ij}=\entry_{ij}(A)$.
 \end{proposition}

\Proof{}
The equation
$$
      A\IC{n}{j} = \sum_{i=1}^m a_{ij} \IC{m}{i} \eqno{(3')}
$$
is analogous to equation~(3); it says that $A\IC{n}{j}=\col_j(A)$.
Note also that
 $$
   \bphi_j=\bPhi(\IC{n}{j}),\;\;\;\bpsi_i=\bPsi(\IC{m}{i}).
 $$
The matrix $A$ characterized by the equation
 $$
     \TT(\bPhi(X))=\bPsi(AX) \eqno{(4)}
 $$
for $X\in\F^{n\times 1}$.
(Equation~(4) is obtained by
rewriting equation~(1) as $\TT\circ\bPhi=\bPsi\circ\Aa$
and evaluating at $X$.)
Now take $X=\IC{n}{j}$ in equation~(4)
to obtain
 \begin{eqnarray*}
    \TT(\bphi_j) &=& \bPsi(A\IC{n}{j}) \\
                &=& \bPsi\left( \sum_{i=1}^m a_{ij}\IC{m}{i} \right) \\
                &=&   \sum_{i=1}^m a_{ij}\bPsi(\IC{m}{i})) \\
                &=&   \sum_{i=1}^m a_{ij}\bpsi_i
 \end{eqnarray*}
as required.
\QED


 \begin{remark}\rm
 When $\VV=\WW$ and $\bPsi=\bPhi$
the matrix $A$ representing
the map $\TT$ in the frame $\bPhi$ is characterized
by the equations
 $$
    \TT(\bphi_j) = \sum_{i=1}^m a_{ij}\bphi_i
 $$
for $j=1,2,\ldots,n$ where $a_{ij}=\entry_{ij}(A)$.
 \end{remark}


 \begin{example}\rm We take
  $$
     \VV=\Poly_3(\F),\;\; \WW=\F^{1\times 3},
  $$
define $\TT:\VV\to\WW$ by
 $$
 \TT(f) = \Mat{ccc}f(1) & f(-1) & f'(0) \Rix.
 $$
Let the  frame $\bPhi:\F^{4\times 1}\to\VV$ be the standard frame given by
 $$
    \bphi_1(t)=1,\;\;\bphi_2(t)=t,\;\;\bphi_3(t)=t^2,\;\; \bphi_4(t)=t^3,
  $$
and the frame $\bPsi:\F^{3\times 1}\to\F^{1\times 3}$ be defined by
$\bPsi(Y) = Y\tr$ so that
 \begin{eqnarray*}
     \bpsi_1 &=&\Mat{rrr}1 & 0 & 0\Rix\\
     \bpsi_2 &=&\Mat{rrr}0 & 1 & 0\Rix\\
     \bpsi_3 &=&\Mat{rrr}0 & 0 & 1\Rix
 \end{eqnarray*}
 We find the first column of $A$:
  \begin{eqnarray*}
  \TT(\bphi_1) &=&\Mat{rrr}
                   \bphi_1(1) & \bphi_1(-1) & \bphi_1'(0)
                  \Rix\\
              &=& \Mat{rrr}1 & 1 & 0\Rix\\
              &=& 1\bpsi_1+1\bpsi_2+0\bpsi_3.
  \end{eqnarray*}
 We find the second column of $A$:
  \begin{eqnarray*}
      \TT(\bphi_2) &=&\Mat{rrr}
                   \bphi_2(1) & \bphi_2(-1) & \bphi_2'(0)
                  \Rix\\
                  &=& \Mat{rrr}1 & -1 & 1\Rix\\
                  &=& 1\bpsi_1-1\bpsi_2+1\bpsi_3.
  \end{eqnarray*}
 We find the third column of $A$:
  \begin{eqnarray*}
      \TT(\bphi_3) &=&\Mat{rrr}
                   \bphi_3(1) & \bphi_3(-1) & \bphi_3'(0)
                  \Rix\\
                  &=& \Mat{rrr}1 & 1 & 2\Rix\\
                  &=& 1\bpsi_1+1\bpsi_2+2\bpsi_3.
  \end{eqnarray*}
 We find the fourth column of $A$:
  \begin{eqnarray*}
      \TT(\bphi_4) &=&\Mat{rrr}
                   \bphi_4(1) & \bphi_4(-1) & \bphi_4'(0)
                  \Rix\\
                  &=& \Mat{rrr}1 & -1 & 3\Rix\\
                  &=& 1\bpsi_1-1\bpsi_2+3\bpsi_3.
  \end{eqnarray*}
  Thus $A$ is given by
   $$
      A=\Mat{rrrr}
        1 &  1 & 1 &  1 \\
        1 & -1 & 1 & -1 \\
        0 &  1 & 2 &  3
      \Rix.
    $$
 \end{example}

   This example required very little calculation
   because of the simple nature of the frame $\bPsi$. In general
   we will have to solve an inhomogeneous linear system of
   $m$ equations in $m$ unknowns to find the $j$-th column of
   $A$. As we must solve such a system for each value of
   $j=1,2,\ldots,n$ this can lead to quite  a bit of work.
   The next example requires us to invert an $m\times m$
   matrix to find $A$. It still isn't too bad since we take
   $m=2$.

 \begin{example}\rm We take
  $$
     \VV=\Poly_3(\F),\;\; \WW=\F^{1\times 2},
  $$
define $\TT:\VV\to\WW$ by
 $$
    \TT(f) = \Mat{ccc}f(1) & f(2) \Rix.
  $$
Let the  frame $\bPhi:\F^{4\times 1}\to\VV$ be the standard frame given by
 $$
    \bphi_1(t)=1,\;\;\bphi_2(t)=t,\;\;\bphi_3(t)=t^2,\;\; \bphi_4(t)=t^3,
  $$
and the frame $\bPsi:\F^{2\times 1}\to\F^{1\times 2}$ be defined by
 \begin{eqnarray*}
     \bpsi_1 &=&\Mat{rr}7 & 3\Rix\\
     \bpsi_2 &=&\Mat{rr}2 & 1\Rix
 \end{eqnarray*}
 We find the first column of $A$:
  \begin{eqnarray*}
  \TT(\bphi_1) &=&\Mat{rr}
                   \bphi_1(1) & \bphi_1(2)
                  \Rix\\
              &=& \Mat{rr} 1  & 1 \Rix\\
              &=& a_{11} \Mat{rr}7 & 3\Rix
                  +a_{21}\Mat{rr}2 & 1\Rix.
  \end{eqnarray*}
 This leads to the $2\times 2$ system
  \begin{eqnarray*}
           1 &=& 7a_{11}+2a_{21} \\
           1 &=& 3a_{11}+1a_{21}
  \end{eqnarray*}
  which has the solution $a_{11}=-1$, $a_{21}=4$.
  We repeat this for columns two, three, and four to obtain
   $$
      A=\Mat{rrrr}
        -1 &  -3 & -7 &  -15 \\
         4 &  11 & 25 &   52
      \Rix.
    $$
 \end{example}


 \section{The Transition Matrix}
Let $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ be a basis for a vector space
$\VV$ and $\bPhi:\F^{n\times 1}\to \VV$ be the corresponding frame.
Let $(\tilde{\bphi}_1,\tilde{\bphi}_2,\ldots,\tilde{\bphi}_n)$ be another
basis for $\VV$ with corresponding frame
$\widetilde{\bPhi}:\F^{n\times 1}\to \VV$. Then the
composition
 $$
   \widetilde{\bPhi}^{-1}\circ\bPhi: \F^{n\times 1}\to \F^{n\times 1}
 $$
is a linear isomorphism from $\F^{n\times 1}$ to itself and
is thus given by a an invertible matrix $P$:
 $$
   \widetilde{\bPhi}^{-1}(\bPhi(X)) = PX
 $$
for $X\in\F^{n\times 1}$.

 \begin{definition}\rm\label{change-basis}
   This matrix $P$ is called the
\jdef{transition matrix}
from the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$  to the basis
$(\tilde{\bphi}_1,\tilde{\bphi}_2,\ldots,\tilde{\bphi}_n)$.
(One also calls $P$ the transition matrix from the frame
$\bPhi$ to the frame $\widetilde{\bPhi}$.)
 \end{definition}

 \begin{remark}\rm
 Note that  $P$ is the matrix representing the identity
transformation in the frames $\bPhi$ and $\widetilde{\bPhi}$,
but it is less confusing to have a separate name
in this context since it plays a different role.
\end{remark}


The equation defining $P$ may be written in the form
 $$
   \widetilde{\bPhi}(PX)= \bPhi(X).
 $$
If we plug in $X=\col_j(I_n)$ the $j$-the column of the identity
matrix we obtain
 $$
   \sum_{i=1}^n p_{ij}\tilde{\bphi}_i=\bphi_j
 $$
where $p_{ij}=\entry_{ij}(P)$ the $(i,j)$-entry of $P$. Thus
the matrix $P$ enables us to express the vectors $\bphi_j$
as a linear combination of the vectors $\tilde{\bphi}_i$,
$i=1,2,\ldots,n$. On the other hand suppose that
$\vv\in\VV$. Then $\vv=\bPhi(X)$ for some $X\in\F^{n\times 1}$
and $\vv=\widetilde{\bPhi}(\widetilde{X})$ for some $\widetilde{X}$:
 $$
   \vv=\sum_{i=1}^n x_i\bphi_i
       =\sum_{i=1}^n \tilde{x}_i\tilde{\bphi}_i.
 $$
Since $\bPhi(X)=\widetilde{\bPhi}(\widetilde{X})$ we have
$\widetilde{X}=PX$ so that $P$ transforms the column vector $X$
which represents $\vv$ in the frame $\bPhi$ to the column vector
$\widetilde{X}$ which represents the same vector $\vv$ in the frame
$\widetilde{\bPhi}$.


 \begin{example}\rm
   Here is a basis for $\Poly_2(\F)$:
 $$
    \bphi_1(\xy)=1,\;\;\bphi_2(\xy)=\xy,\;\;\bphi_3(\xy)=\xy^2,
 $$
and here is another basis:
 $$
   \tilde{\bphi}_1(\xy)=1,\;\;
   \tilde{\bphi}_2(xy)=\xy+1,\;\;\tilde{\bphi}_3(\xy)=(\xy+1)^2.
 $$
We find the transition matrix $P$ from the first basis to the second.
 The columns of $P$ are given by
 $$
   \col_j(P) = \widetilde{\bPhi}^{-1}(\bPhi(\IC{n}{j}))
 $$
for $j=1,2,3$, where $\IC{n}{j}=\col_j(I_3)$ is the $j$-th
column of the identity matrix. We apply $\widetilde{\bPhi}_j$
to both sides and use the formula $\bPhi(\IC{n}{j})=\bphi_j$
to rewrite this in the form
 $$
   p_{1j}\tilde{phi}_1+p_{2j}\tilde{\bphi}_2+p_{3j}\tilde{\bphi}_3=\bphi_j
 $$
or
$$
   p_{1j}1+p_{2j}(t+1)+p_{3j}(t+1)^2= t^{j-1}
$$
where $p_{ij}=\entry_{ij}(P)$.
For each $j=1,2,3$ we must thus solve three equations
in three unknowns. By equating coefficients of $\xy^0$, $\xy^1$, $\xy^2$
we get
 $$
    \begin{array}{lll}
      p_{11} = 1, &  p_{12} = 1, & p_{13} = \sig 1,\\
      p_{21} = 0, &  p_{22} = 1, & p_{23} =     -2,\\
      p_{31} = 0, &  p_{32} = 0, & p_{33} = \sig 1.
    \end{array}
 $$
 \end{example}

\begin{question}\rm\Amark\
Let $(\bphi_1,\bphi_2,\bphi_3)$ and
$(\tilde{\bphi}_1,\tilde{\bphi}_2,\tilde{\bphi}_3)$
be bases for a vector space $\VV$ and $P\in\F^{3\times 3}$
be the transition matrix from the former to the latter.
Suppose that a matrix $B\in\F^{3\times 3}$ is defined by
$\entry_{ij}(B)=b_{ij}$ where
\begin{eqnarray*}
     \tilde{\bphi}_1 &=& b_{11}\bphi_1+b_{12}\bphi_2+b_{13}\bphi_3\\
     \tilde{\bphi}_2 &=& b_{21}\bphi_1+b_{22}\bphi_2+b_{23}\bphi_3\\
     \tilde{\bphi}_3 &=& b_{31}\bphi_1+b_{32}\bphi_2+b_{33}\bphi_3.
\end{eqnarray*}
Which of the following is necessarily true?
\begin{description}
 \item[(1)] $B$ is $P$.
 \item[(2)] $B$ is the transpose of $P$.
 \item[(3)] $B$ is  $P^{-1}$.
 \item[(4)] $B$ is the transpose of $P^{-1}$.
\end{description}
 (Answer: (4).)
\end{question}


\section{Change of Frames}
Let
 $
   \TT:\VV\to \WW,
 $
 $
   \bPhi:\F^{n\times 1}\to \VV,
 $
 $
   \bPsi:\F^{m\times 1}\to \WW,
 $
be as in  Section~\ref{sec:MatRep} and
let $A\in\F^{m\times n}$ be the matrix representing
the map $\TT:\VV\to\WW$ in the frames $\bPhi$ and $\bPsi$,
and $\Aa:\F^{n\times 1}\to\F^{m\times 1}$ be the matrix
map corresponding to $A$.

 \begin{proposition} \label{REP-Bi}
Changing frames has the effect of replacing the matrix $A$ representing
$\TT$ by an equivalent matrix $\widetilde{A}$. More precisely,
for $\widetilde{A}\in\F^{m\times n}$ the following conditions are
equivalent:
 \begin{description}
   \item[(1)]
     There are frames $\widetilde{\bPhi}:\F^{n\times 1}\to \VV$
     and $\widetilde{\bPsi}:\F^{m\times 1}\to \WW$ so that
     $\widetilde{A}$ is the matrix representing $\TT$ in the frames
     $\widetilde{\bPhi}$ and $\widetilde{\bPsi}$.
   \item[(2)] The matrices $A$ and $\widetilde{A}$ are equivalent
     in the sense that there are invertible matrices $P\in\F^{n\times n}$
     and $Q\in\F^{m\times m}$ such that
      $$
         \widetilde{A}= QAP^{-1}.
      $$
 \end{description}
 \end{proposition}

 \Proof{}
  Assume~(1). Let $\widetilde{\Aa}$ be the matrix map
  corresponding to $\widetilde{A}$:
 $$
    \widetilde{\Aa}= \widetilde{\bPsi}^{-1}\circ\TT\circ\widetilde{\bPhi}.
 $$
Then
$$
   \widetilde{\bPsi}\circ\widetilde{\Aa}\circ\widetilde{\bPhi}^{-1}
= \TT =\bPsi\circ\Aa\circ \bPhi^{-1}
$$
so
 $$
   \widetilde{\Aa} = \Qq\circ\Aa\circ \Pp^{-1}\eqno{(5)}
 $$
where $\Qq:\F^{m\times 1}\to \F^{m\times 1}$
and $\Pp:\F^{n\times 1}\to \F^{n\times 1}$
are the transition matrices given by
 $$
   \Qq = \widetilde{\bPsi}^{-1}\circ\bPsi,\;\;
   \Pp = \widetilde{\bPhi}^{-1}\circ\bPhi.
 $$
Then $\Qq$ is a matrix map corresponding
to a matrix $Q\in\F^{m\times m}$
and $\Pp$ is a matrix map corresponding
to a matrix $P\in\F^{n\times n}$.
Equation~$(5)$ implies $\widetilde{A}= QAP^{-1}$.


Assume~(2). Define frames $\widetilde{\bPsi}$ and $\widetilde{\bPhi}$
by
 $$
   \widetilde{\bPsi}=\bPsi\circ\Qq^{-1},\;\;\;
   \widetilde{\bPhi}=\bPhi\circ\Pp^{-1}.
 $$
Then
 \begin{eqnarray*}
      \widetilde{\Aa}
&=& \Qq\circ\Aa\circ \Pp^{-1}\\
&=& (\widetilde{\bPsi}^{-1}\circ\bPsi)
       \circ\Aa\circ
     (\widetilde{\bPhi}^{-1}\circ\bPhi)^{-1}\\
&=& \widetilde{\bPsi}^{-1}\circ
      (\bPsi \circ\Aa\circ\bPhi^{-1})
     \circ\widetilde{\bPhi}^{-1}\\
&=& \widetilde{\bPsi}^{-1}\circ \TT\circ \widetilde{\bPhi}
 \end{eqnarray*}
which proves~(1).
 \QED

 \begin{corollary}
   Changing the frame $\bPsi$ at the target has the effect of
 replacing the matrix $A$  representing $\TT$
by a left equivalent matrix $\widetilde{A}$. More precisely,
 for $\widetilde{A}\in\F^{m\times n}$ the following conditions are equivalent:
 \begin{description}
   \item[(1)]
     There is a frame
     $\widetilde{\bPsi}:\F^{m\times 1}\to \WW$ so that
     $\widetilde{A}$ is the matrix representing $\TT$ in the frames
     $\bPhi$ and $\widetilde{\bPsi}$.
   \item[(2)] The matrices $A$ and $\widetilde{A}$ are equivalent
     in the sense that  there is an invertible matrix
     $Q\in\F^{m\times m}$ such that
      $
         \widetilde{A}= QA.
      $
 \end{description}
 \end{corollary}

\Proof{} Take $\bPhi=\widetilde{\bPhi}$ in  Theorem~\ref{REP-Bi}
 so that $P=I_n$ is the identity matrix. \QED

\begin{corollary}
Changing the frame $\bPhi$ % at the source
has the effect of
replacing the matrix $A$  representing $\TT$
by a right equivalent matrix $\widetilde{A}$.  More precisely,
 for $\widetilde{A}\in\F^{m\times n}$ the following conditions are equivalent:
 \begin{description}
   \item[(1)]
     There is a frame
     $\widetilde{\bPhi}:\F^{n\times 1}\to \VV$ so that
     $\widetilde{A}$ is the matrix representing $\TT$ in the frames
     $\widetilde{\bPhi}$ and $\bPsi$.
   \item[(2)] The matrices $A$ and $\widetilde{A}$ are right equivalent
     in the sense that     there is an invertible matrix
     $P\in\F^{n\times n}$ such that
      $
         \widetilde{A}= AP^{-1}.
      $
 \end{description}
 \end{corollary}

\Proof{} Take $\bPsi=\widetilde{\bPsi}$ in   Theorem~\ref{REP-Bi}
 so that $Q=I_m$ is the identity matrix. \QED





 \begin{corollary}[Similarity]\label{REP-Sim}
Now assume that $\VV=\WW$ so that $\TT:\VV\to\VV$ is a linear map
from a vector space to itself.
Let $\bPhi:\F^{n\times 1}$ be a frame for $\VV$.
Then changing frames has the effect of replacing the matrix representing
$\TT$ by a similar matrix.
More precisely,
for $\widetilde{A}\in\F^{n\times n}$ the
following conditions are equivalent:
 \begin{description}
   \item[(1)]
     There is a frame $\widetilde{\bPhi}:\F^{n\times 1}\to \VV$
     such that
     $\widetilde{A}$ is the matrix representing $\TT$ in the frame
     $\widetilde{\bPhi}$.
   \item[(2)] The matrices $A$ and $\widetilde{A}$  are \jdef{similar}, i.e.
     there is an invertible matrix $P\in\F^{n\times n}$
      such that
      $$
         \widetilde{A}= PAP^{-1}.
      $$
 \end{description}
 \end{corollary}

\Proof{} Take $\bPsi=\bPhi$ and
$\widetilde{\bPsi}=\widetilde{\bPhi}$ in Theorem~\ref{REP-Bi}
 so that $Q=P$. \QED



\pagebreak[4]

Diagrams can be useful for remembering formulas.
The formula $\widetilde{\bPhi}\circ\Pp=\bPhi$ which says that
$P$ is the transition matrix from $\bPhi$ to $\widetilde{\bPhi}$
can be represented by the triangle:

\medskip

{\centering
\setlength{\unitlength}{.1in}
 \begin{picture}(50,14)
  % vertices
        \put(25,11){\makebox(0,0){$\VV$}}
        \put(15,2){\makebox(0,0){$\F^{n\times 1}$}}
        \put(35,2){\makebox(0,0){$\F^{n\times 1}$}}
  % edges
        \put(17,2){\vector(1,0){16}}
        \put(17,4){\vector(1,1){5}}
        \put(33,4){\vector(-1,1){5}}
  % labels
        \put(25,4){\makebox(0,0){$\Pp$}}
        \put(19,8){\makebox(0,0){$\bPhi$}}
        \put(31,8){\makebox(0,0){$\widetilde{\bPhi}$}}
 \end{picture}
}


\medskip

\noindent The formula  $\bPsi\circ\Aa=\TT\circ\bPhi$ which says
that $A$ is the matrix representing $\TT$ in the frames
$\bPhi$ and $\bPsi$ can be represented by the rectangle:


\medskip

{\centering
\setlength{\unitlength}{.1in}
 \begin{picture}(50,18)
     % vertices
         \put(15,12){\makebox(0,0){$\VV$}}
         \put(35,12){\makebox(0,0){$\WW$}}
         \put(15,3){\makebox(0,0){$\F^{n\times 1}$}}
         \put(35,3){\makebox(0,0){$\F^{m\times 1}$}}
      % edges
         \put(17,12){\vector(1,0){15}}
         \put(15,5){\vector(0,1){5}}
         \put(17,3){\vector(1,0){15}}
         \put(35,5){\vector(0,1){5}}
      % labels
         \put(25,14){\makebox(0,0){$\TT$}}
         \put(25,5){\makebox(0,0){$\Aa$}}
         \put(13,8){\makebox(0,0){$\bPhi$}}
         \put(38,8){\makebox(0,0){$\bPsi$}}
 \end{picture}
}


\medskip

\noindent The Change of Frames Theorem is represented by the
following diagram:


\medskip



{\centering
\setlength{\unitlength}{.1in}
  \begin{picture}(50,20)
     % vertices
          \put(15,10){\makebox(0,0){$\VV$}}
          \put(35,10){\makebox(0,0){$\WW$}}
          \put(10,5){\makebox(0,0)[l]{$\F^{n\times 1}$}}
          \put(10,15){\makebox(0,0)[l]{$\F^{n\times 1}$}}
          \put(40,5){\makebox(0,0)[l]{$\F^{m\times 1}$}}
          \put(40,15){\makebox(0,0)[l]{$\F^{m\times 1}$}}
     % edges
           \put(17,10){\vector(1,0){15}}
           \put(14,5){\vector(1,0){25}}
           \put(14,15){\vector(1,0){25}}
           \put(10,8){\vector(0,1){5}}
           \put(40,8){\vector(0,1){5}}
           \put(11,6){\vector(1,1){3}}
           \put(11,14){\vector(1,-1){3}}
           \put(36,11){\vector(1,1){3}}
           \put(36,9){\vector(1,-1){3}}
      % labels
          \put(7,10){\makebox(0,0){$\Pp$}}
          \put(43,10){\makebox(0,0){$\Qq$}}
          \put(25,11){\makebox(0,0){$\TT$}}
          \put(25,6){\makebox(0,0){$\Aa$}}
          \put(25,16){\makebox(0,0){$\widetilde{\Aa}$}}
          \put(13,7){$\bPhi$}
          \put(13,12){$\widetilde{\bPhi}$}
          \put(35,7){$\bPsi$}
          \put(35,12){$\widetilde{\bPsi}$}
  \end{picture}
}

\pagebreak[4]

\section{Flags}
The following terminology will be used in the next section.

    \begin{definition}\rm \label{flag}
    A \jdef{flag} in a vector space  $\VV$ is an
    increasing sequence of subspaces
   $$
     \{0\}=\VV_0\subseteq\VV_1\subseteq\VV_2\subseteq\cdots\subseteq\VV_n=\VV
    $$
     where $\dim(\VV_j)=j$.
     The \jdef{standard flag}
     $$
        \{0\}=\FLAG{n}{0}\subseteq\FLAG{n}{1}\subseteq\FLAG{n}{2}
     \subseteq \cdots\subseteq\FLAG{n}{n}=\VV
      $$
     in $\F^{n\times 1}$ is defined by
      $$
        \FLAG{n}{k}=\Span(\IC{n}{1},\IC{n}{2},\ldots,\IC{n}{k})
      $$
     where $\IC{n}{j}=\col_j(I_n)$ is the $j$-th column of the
     $n\times n$ identity matrix. For example,
      $$
         \FLAG{3}{2}=
         \Span\left(
         \Mat{r}1\\0\\0\Rix,
         \Mat{r}0\\1\\0\Rix
         \right) =
         \left\{
         \Mat{ll}x_1\\x_2\\0\Rix\in\F^{3\times 1}:
         x_1,x_2\in\F
         \right\}.
       $$
     \end{definition}

Now any basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ for a vector
space $\VV$ determines a flag by
 $$
     \VV_k=\Span(\bphi_1,\bphi_2,\ldots,\bphi_k).
  $$
We call this the \jdef{flag determined by} the basis.
(Thus the standard basis for $\F^{n\times 1}$ is
determines  the standard flag.)
If $\bPhi:\F^{n\times 1}\to\VV$ is the frame
corresponding to the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
we also say that the flag is determined by the frame.
Note that
 $$
    \bPhi(\FLAG{n}{k}) = \VV_k.
  $$

Different bases can determine the same flag. For example,
if we replace each $\bphi_j$ by a non-zero multiple
of itself we do not change $\VV_k$. Our next task is
to determine when two different bases determine
the same flag.


 \begin{proposition}
    Two bases determine the same flag if and only if the
    transition matrix $P$ from one to the other preserves the
standard flag i.e. if and only if
     $$
        P\FLAG{n}{k}=\FLAG{n}{k}
     $$
for $k=1,2,\ldots,n$.
 \end{proposition}

\Proof{} Let $\bPhi$ and $\widetilde{\bPhi}$ be two frames
for $\VV$ which determine the same flag and let
$P\in\F^{n\times n}$ be the transition matrix from
$\bPhi$ to $\widetilde{\bPhi}$. Thus
 $$
   \widetilde{\bPhi}^{-1}\circ\bPhi: \F^{n\times 1}\to \F^{n\times 1}
 $$
 and
 $$
   \widetilde{\bPhi}^{-1}(\bPhi(X)) = PX
 $$
for $X\in\F^{n\times 1}$. Since
 $$
    \bPhi(\FLAG{n}{k} =\widetilde{\bPhi}(\FLAG{n}{k}
  $$
we conclude that
 $$
    P\FLAG{n}{k}=\FLAG{n}{k}
  $$
for $k=1,2,\ldots,n$. \QED


\section{Normal Forms}
  We are already accustomed to the idea that to solve
  a problem involving a matrix we should transform it
  to an equivalent problem involving a simpler matrix.
  {\em Simpler} generally means having a special form
  where many of the entries vanish. We can now express
  this idea in a new way: {\em To solve a problem
  involving a linear map we should choose frames so that
  the matrix representation is simple.}
  A matrix in  \jdef{normal form} is one which is
  simple (according to some notion of {\em simple}.)

   Our purpose in this section is to understand what
   frames give normal forms.
   Most of these definitions are familiar
   (diagonal, reduced row echelon form etc.); some
   are new and will be used later on. The pattern
   in each case is the same: first we state
   (or restate) the definition of the simple form
   in matrix theoretic language, then we give an equivalent
   formulation in terms of the standard basis and flag, and
   finally we apply the  Representation Theorem~\ref{REP-thm}
   to say when a matrix representation has the simple form.

\begin{notation}\rm\label{notation}
   Throughout we will use the notations
    \begin{eqnarray*}
     \VV_k &=& \Span(\bphi_1,\bphi_2,\ldots,\bphi_k)\\
     \WW_k &=& \Span(\bpsi_1,\bpsi_2,\ldots,\bpsi_k)
  \end{eqnarray*}
  for the (elements of the) flags determined by $\bPhi$ and $\bPsi$
  respectively as well as the notation
   $\FLAG{n}{k}$ for the standard flag introduced Definition~\ref{flag}.
Recall also that
 $$
      \IC{n}{j}= \col_j(I_n)
 $$
denotes the $j$th column of the $n\times n$
identity matrix $I_n$.
Also for $A\in\F^{m\times n}$, and subspaces  $V\subseteq\F^{n\times 1}$
and  $W\subseteq\F^{m\times 1}$
$A(V)\subseteq\F^{m\times 1}$
denotes the image of $V$
and
$A^{-1}(W)\subseteq\F^{n\times 1}$
denotes the preimage of $W$
under the matrix map corresponding to $A$, i.e.
$$
A(V)= \{AX\in \F^{m\times 1}: X\in V\}.
$$
and
$$
A^{-1}(W)= \{X\in \F^{n\times 1}: AX\in W\}.
$$
By Theorems~\ref{range-subspace} and~{nullspace-subspace}, these
are again subspaces.
\end{notation}


\subsection{Zero-One Normal Form}\label{subsec:0-1}
A matrix $D\in\F^{m\times n}$
 is in \jdef{zero-one normal form} iff
 $$
    D=\Mat{ll}
       I_r               & 0_{r\times (n-r)}\\
       0_{(m-r)\times r} & 0_{(m-r)\times (n-r)}
    \Rix
  $$
where $I_r$ is the $r\times r$ identity matrix.
Here's how to say this definition in the language of this chapter.

\begin{proposition}
The matrix $D\in\F^{m\times n}$ is in zero-one normal form
iff
$$
   \begin{array}{lll}
       D\IC{n}{j}=&\IC{m}{j} &\mbox{ for $j=1,2,\ldots,r$;}\\
       D\IC{n}{j}=& 0 &\mbox{ for $j=r+1,r+2,\ldots,n$.}
   \end{array}
$$
where $\IC{n}{j}$ is as in~\ref{notation}.
\end{proposition}

For example, the matrix
 $$
    D= \Mat{llll}
         1&0&0&0\\
         0&1&0&0\\
         0&0&0&0\\
       \Rix
 $$
satisfies
 $$
    D\IC{4}{1}=\IC{3}{1},\;\;D\IC{4}{2}=\IC{3}{2}\;\;
    D\IC{4}{3}=D\IC{4}{4}=0.
 $$

 \begin{corollary}
The matrix representing the linear map $\TT:\VV\to \WW$
in the frames $\bPhi$ and $\bPsi$ is in zero-one normal form iff
there is a number $r\le n,m$ such that
$$
   \begin{array}{lll}
     \TT(\bphi_j) =&\bpsi_j &\mbox{ for $j=1,2,\ldots,r$;}\\
     \TT(\bphi_j) =&\0     &\mbox{ for $j=r+1,r+2,\ldots,n$.}
   \end{array}
$$
  \end{corollary}


 \begin{theorem}\label{REP-ZO}
   For any linear map $\TT:\VV\to\WW$
there are frames $\bPhi$ and $\bPsi$ such that the
matrix representing $\TT$ in the frames $\bPhi$ and
$\bPsi$ is in zero-one normal form.
 \end{theorem}

\Proof{}
Let $(\bphi_{n-r+1},\bphi_{n-r+2},\ldots,\bphi_n)$ be a basis
for  $\NULLSP(\TT)$ and extend it to a basis
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$ for $\VV$. For $j=1,2,\ldots,r$
let $\bpsi_j=\TT(\bphi_j)$.
We claim that
$(\bpsi_1,\bpsi_2,\ldots,\bpsi_r)$ is a basis
for the range $\Range(\TT)$ of $\TT$.
We must verify three things:

 \begin{description}
  \item[(1)] $\bpsi_j\in\Range(\TT)$ for $j=1,2,\ldots,r$.
  \item[(2)] $\Range(\TT)=\Span(\bpsi_1,\bpsi_2,\ldots,\bpsi_r)$.
  \item[(3)] The sequence $(\bpsi_1,\bpsi_2,\ldots,\bpsi_r)$
     is independent.
 \end{description}
Part~(1) is immediate from the definition of the range and
the fact that $\bpsi_j=\TT(\bphi_j)$.
For part~(2) choose $\ww\in\Range(\TT)$.
Then $\ww=\TT(\vv)$ for some $\vv\in\VV$. As
$(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is a basis for $\VV$ there
are numbers $x_1,x_2,\ldots,x_n$ with
 $$
     \vv=\sum_{j=1}^nx_j\bphi_j.
  $$
Hence
 \begin{eqnarray*}
     \ww &=& \TT(\vv) \\
         &=& \TT\left(\sum_{j=1}^nx_j\bphi_j \right) \\
         &=& \sum_{j=1}^n x_j\TT(\bphi_j) \\
         &=& \sum_{j=1}^r x_j\TT(\bphi_j) \\
         &=& \sum_{j=1}^r x_j\bpsi_j.
 \end{eqnarray*}
For part~(3) assume that the numbers $y_1,y_2,\ldots,y_r$
satisfy
 $$
    \sum_{j=1}^r y_j\bpsi_j=\0;
  $$
we must show they vanish.
Let
$$
\uu=\sum_{j=1}^r y_j\bphi_j \eqno{(i)}
$$
so that
\begin{eqnarray*}
    \TT(u) &=& \TT\left(\sum_{j=1}^r y_j\bphi_j\right) \\
           &=& \sum_{j=1}^r y_j\TT(\bphi_j) \\
           &=& \sum_{j=1}^r y_j \bpsi_j \\
           &=& \0
\end{eqnarray*}
so $\uu\in\NULLSP(\TT)$. Hence there are numbers
$y_{n-r+1},y_{n-r+2},\ldots,y_n$ with
$$
    \uu = \sum_{j=n-r+1}^n y_j \bphi_j. \eqno{(ii)}
$$
Combining~(i) and~(ii) gives
 $$
     \sum_{j=1}^r y_j\bphi_j - \sum_{j=n-r+1}^n y_j \bphi_j =\0
 $$
so the coefficients $y_j$ vanish as $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
is a basis for $\VV$.

   Now extend $(\bpsi_1,\bpsi_2,\ldots,\bpsi_r)$ to a basis
$(\bpsi_1,\bpsi_2,\ldots,\bpsi_n)$ for $\WW$. The conclusion
of the theorem follows immediately from the previous corollary. \QED


\subsection{Row Echelon Form}\label{subsec:ref}

An $m\times n$ matrix $R$ is in \jdef{row echelon form}
iff
 \begin{description}
   \item[(1)]
    All the rows which vanish identically (if any) appear below
    the other (non-zero) rows.
    \item[(2)]
      The  leading entry in any row appears to the left of the leading
      entry of any non-zero row below.
 \end{description}
 (Here the \jdef{leading entry} in any row is the first non-zero
 entry in that row.)
 Here's how to say this definition in the language of this chapter.

 \begin{proposition}
The matrix $R\in\F^{m\times n}$ is in row echelon form
iff there are indices
$j_0=0<1\le j_1<j_2<\cdots<j_r\le n$ such that
$$
   R\bigl(\FLAG{n}{j}\bigr) = \FLAG{m}{i}\;\;\;\mbox{ for $j_i \le j < j_{i+1}$}
$$
for $i=0,1,2,\ldots,r-1$. (See~\ref{notation}.)
The leading entry in the $i$-th row occurs in the $j_i$-th column.
 \end{proposition}


For example, if $a_1a_2a_3\ne 0$, then the matrix
$$
 R =
    \Mat{ccccccc}
      0 & a_1    & b_1 & c_{12}    & b_2 & c_{13} & c_{14} \\
      0 & 0      & a_2 & c_{22}    & b_3 & c_{23} & c_{24} \\
      0 & 0      & 0   &  0        & a_3 & c_{33} & c_{34} \\
      0 & 0      & 0   &  0        & 0   & 0      & 0      \\
      0 & 0      & 0   &  0        & 0   & 0      & 0
    \Rix
$$
is in row echelon form with $j_1=2$, $j_2=3$, $j_3=5$, since
 $ R\FLAG{7}{1}=\FLAG{5}{0}$,
 $R\FLAG{7}{2}=\FLAG{5}{1}$,
 $R\FLAG{7}{3}=R\FLAG{7}{4}=\FLAG{5}{2}$,
 $R\FLAG{7}{5}=R\FLAG{7}{6}=R\FLAG{7}{7}=\FLAG{5}{3}$.
The leading entries are $a_1$, $a_2$, $a_3$.

By the Representation Theorem~\ref{REP-thm},
the matrix representing the linear map $\TT:\VV\to \WW$
in the frames $\bPhi$ and $\bPsi$ is in Row Echelon Form
iff there are indices $j_0=0<1\le j_1<j_2<\cdots<j_r\le n$ such that
$$
   \TT(\VV_j) = \WW_i\;\;\;\mbox{ for $j_i \le j < j_{i+1}$}
$$
for $i=0,1,2,\ldots,r-1$
where
 \begin{eqnarray*}
      \VV_j &=& \Span(\bphi_1,\bphi_2,\ldots,\bphi_j) \\
      \WW_i &=& \Span(\bpsi_1,\bpsi_2,\ldots,\bphi_i)
 \end{eqnarray*}
are the flags determined by the frames $\bPhi$ and $\bPsi$.

\subsection{Reduced Row Echelon Form} \label{subsec:rref}
   An $m\times n$ matrix $R$ is in \jdef{reduced row echelon form}
iff it is in row echelon form and, in addition, satisfies
 \begin{description}
    \item[(3)]
     The leading entry in any non-zero row is a $1$,
    \item[(4)]
      All other entries in the column of a leading entry are $0$.
 \end{description}
 Here's how to say this definition in the language of this chapter.

 \begin{proposition}
A matrix $R\in\F^{m\times n}$ is in reduced row echelon form
iff there are indices $j_0=0<1\le j_1<j_2<\cdots<j_r\le n$ such that
 $$
  \begin{array}{ll}
     R\IC{n}{j_i} = \IC{m}{i} &\mbox{ for $i=1,2,\ldots,r$,} \\
     R\IC{n}{j}\in\FLAG{m}{i} &\mbox{ for $j_i < j <  j_{i+1}$.}
    \end{array}
 $$
(See~\ref{notation}.)
 \end{proposition}

For example, the matrix
$$
 R =
    \Mat{ccccccc}
      0 & 1      & 0 &  c_{12} & 0 & c_{13} & c_{14} \\
      0 & 0      & 1 &  c_{22} & 0 & c_{23} & c_{24} \\
      0 & 0      & 0 &  0      & 1 & c_{33} & c_{34} \\
      0 & 0      & 0 &  0      & 0 & 0      & 0      \\
      0 & 0      & 0 &  0      & 0 & 0      & 0
    \Rix
$$
is in reduced row echelon form since
with $j_1=2$, $j_2=3$, $j_3=5$, we have
 \begin{eqnarray*}
 R\IC{7}{1} &=& 0 \in \FLAG{5}{0}  \\
 R\IC{7}{2} &=& \IC{5}{1} \\
 R\IC{7}{3} &=& \IC{5}{2} \\
 R\IC{7}{4} &=& c_{12}\IC{5}{1}+c_{22}\IC{5}{2}\in\FLAG{5}{2} \\
 R\IC{7}{5} &=& \IC{5}{3} \\
 R\IC{7}{6} &=& c_{13}\IC{5}{1}+c_{23}\IC{5}{2}+c_{33}\IC{5}{3}\in \FLAG{5}{3} \\
 R\IC{7}{7} &=& c_{14}\IC{5}{1}+c_{24}\IC{5}{2}+c_{34}\IC{5}{3}\in \FLAG{5}{3}
 \end{eqnarray*}

\begin{corollary}
The matrix representing the linear map $\TT:\VV\to \WW$
in the frames $\bPhi$ and $\bPsi$ is in reduced row echelon form
iff there are indices $j_0=0<1\le j_1<j_2<\cdots<j_r\le n$ such that
$$
    \begin{array}{ll}
       \TT(\bphi_{j_i})= \bpsi_i &\mbox{ for $i=1,2,\ldots,r$,}  \\
       \TT(\bphi_j)\in \WW_i    &\mbox{ for $j_i < j <j_{i+1}$,}
    \end{array}
$$
where $\WW_i=\Span(\bpsi_1,\bpsi_2,\ldots,\bpsi_i)$.
\end{corollary}

 \begin{theorem}\label{REP-RREF}
  For any $\TT:\VV\to\WW$ and frame $\bPhi:\F^{n\times 1}\to\VV$
there is a frame $\bPsi:\F^{n\times 1}\to\WW$
such that the matrix representing $\TT$ in the frames $\bPhi$ and $\bPsi$
is in reduced row echelon form.
\end{theorem}

\Proof{}
   The indices $j_1,j_2,\ldots,j_r$ are precisely those
values of $j$ for which
$$
 \TT(\bphi_j)\notin  \TT(\VV_{j-1})).\eqno{(\sharp)}
$$
The fact that the $j_i$-th column of the representing matrix
must be $\IC{m}{i}$, the $i$-th column of the identity forces
us to defined $\bpsi_i$ by  the equation
 $$
 \bpsi_i=\TT(\bphi_{j_i}).  \eqno{(\flat)}
 $$
Then the sequence $(\bpsi_1,\ldots,\bpsi_r)$ is independent
since
$$
    \bpsi_i \notin \Span(\bpsi_1,\bpsi_2,\ldots,\bpsi_{i-1})
$$
by definition.
Extend this sequence to a basis $(\bpsi_1,\ldots,\bpsi_m)$ of $\WW$.
\QED

 \begin{corollary}\label{RREF-unique}\sloppy
The matrix of Theorem~\ref{REP-RREF} is unique.
 \end{corollary}

\Proof{} Here's what the statement means.
Assume that $\bPsi$ and $\widetilde{\bPsi}$ are two  frames
for $\WW$,  that $R\in\F^{m\times n}$ is the matrix
representing $\TT$ in the frames $\bPhi$ and $\bPsi$, and that
$\widetilde{R}\in\F^{m\times n}$ is is the matrix
representing $\TT$ in the frames $\bPhi$ and $\widetilde{\bPsi}$.
The corollary asserts that if both $R$ and $\widetilde{R}$ are
in reduced row echelon form, then $R=\widetilde{R}$.
But this is clear from the proof of the RREF Theorem:
equations~$(\sharp)$ and~$(\flat)$ determine
$\bpsi_1,\bpsi_2,\ldots,\bpsi_r$ uniquely. We are free
to extend the basis in any way we like, but this will not
affect the matrix representing $\TT$ since
$(\bpsi_1,\bpsi_2,\ldots,\bpsi_r)$ is  a basis
for  $\Range(\TT)$ of $\TT$. \QED


\subsection{Diagonalization}\label{subsec:diag}
A square matrix $D\in\F^{n\times n}$ is called \jdef{diagonal}
iff $\entry_{ij}(D)=0$ for $i\ne j$, that is, iff all the off-diagonal
entries  vanish.  Here's how to say this definition in the
language of this chapter.


 \begin{proposition}
  A matrix $D\in\F^{n\times n}$ is diagonal iff
  the columns $\IC{n}{j}$ from the standard basis
  iff for $j=1,2,\ldots,n$ we have
   $$
      D\IC{n}{j}=\lambda_j\IC{n}{j}
    $$
  where $\lambda_j=\entry_{jj}(D)$.
(See~\ref{notation}.)
 \end{proposition}


A number $\lambda$ is called an \jdef{eigenvalue} of
a linear map
$\TT:\VV\to\VV$ iff there is a non-zero vector $\vv\in\VV$ such that
 $$
   \TT(\vv) = \lambda\vv.
 $$
Any vector $\vv$ satisfying this equation is called an
\jdef{eigenvector} for the eigenvalue $\lambda$.

\begin{corollary}\label{DIAGONALIZE}
 Let $\TT:\VV\to \VV$
be a linear map from $\VV$ to itself,
$(\bphi_1,\ldots,\bphi_n)$ be a basis for $\TT$,
and $\bPhi:\F^{n\times 1}\to\VV$ be the corresponding
frame.
The matrix representing $\TT$ in the frame $\bPhi$
is diagonal
iff the vectors $\bphi_j$ are eigenvectors
of $\TT$:
$$
    \TT(\bphi_j)=\lambda_j\bphi_j \eqno{(\natural)}
$$
 for $j=1,2,\ldots,n$.
\end{corollary}

\begin{definition}\rm\label{def:diagonalizable}
When $\TT$ and $\bPhi$
 are related by equation~$(\natural)$, we say
 that $\bPhi$ \jdef{diagonalizes} $\TT$.
A linear map $\TT$ is called  \jdef{diagonalizable}
iff there is a frame which diagonalizes it
and a square matrix $A$ is called diagonalizable
iff the corresponding matrix map is, i.e. iff
there is an invertible matrix $P$
such that $P^{-1}AP$ is diagonal.
\end{definition}

\subsection{Triangular Matrices} \label{subsec:tri}
 A  square matrix $B$
is \jdef{triangular}
iff  all the entries  below the diagonal vanish,
i.e. $\entry_{ij}(B)=0$ for $i>j$. For example, the
matrix
  $$
   \Mat{lll}a&b&d\\ 0&d&e\\ 0&0& f \Rix
   $$
  is  triangular.
 Here's how to say this definition in the language of this chapter.


  \begin{proposition}\label{prop:tri-matrix}
    A matrix $B\in\F^{n\times n}$ is  triangular iff
     $$
        B\bigl(\FLAG{n}{k}\bigr)\subseteq\FLAG{n}{k}.
     $$
    A matrix $B\in\F^{n\times n}$ is invertible and   triangular iff
     $$
        B\bigl(\FLAG{n}{k}\bigr)=\FLAG{n}{k}.
     $$
(See~\ref{notation}.)
  \end{proposition}

  \Proof{}
Since $\IC{n}{k}\in\FLAG{n}{k}$
the set inclusion means that
$$
 \col_k(B) = B\IC{n}{k} = \sum_{i=1}^k b_{ik}\IC{n}{i}
$$
where $b_{ik}=\entry_{ik}(B)$. This  says
that $\entry_{ik}(B)=0$ for $i>k$, that is,
that $B$ is triangular.
If $B$ is invertible and triangular,
then $B\bigl(\FLAG{n}{k}\bigr)$ and $\FLAG{n}{k}$ have the same
dimension and so must be equal.
If $B$ is not invertible, then $B\bigl(\FLAG{n}{n}\bigr)\ne\FLAG{n}{n}$.
\QED


   \begin{corollary}
    The matrix representing the linear  map $\TT:\VV\to\VV$
    in the frame $\bPhi$ is triangular iff
     $$
        \TT(\VV_k)\subseteq\VV_k
      $$
     for $k=1,2,\ldots,n$ where
$$
 \VV_j = \Span(\bphi_1,\bphi_2,\ldots,\bphi_j)
$$
is the flag determined by the frame $\bPhi$.
   \end{corollary}



\subsection{Strictly Triangular Matrices}

  A matrix $N\in\F^{n\times n}$ is called \jdef{strictly triangular} iff
  $\entry_{ij}(N)=0$ for $i\ge j$, that is, iff all its entries
   on or below the diagonal vanish.
   For example, the matrix
  $$
   \Mat{lll}0& a&b\\ 0&0&c\\ 0&0& 0 \Rix
   $$
  is strictly triangular.
 Here's how to say this definition in the language of this chapter.


   \begin{proposition}
  A matrix $N\in\F^{n\times n}$ is strictly triangular iff
   $$
N\bigl(\FLAG{n}{k}\bigr)\subseteq\FLAG{n}{k-1}
$$
for $k=1,2,\ldots,n$.
(See~\ref{notation}.)
  \end{proposition}

  \proof{} Exercise.

   \begin{corollary}
    The matrix representing the linear  map $\Nn:\VV\to\VV$
    is strictly triangular iff
     $$
        \Nn(\VV_k)\subseteq\VV_{k-1}
      $$
    for $k=1,2,\ldots,n$  where
$$
 \VV_j = \Span(\bphi_1,\bphi_2,\ldots,\bphi_j)
$$
is the flags determined by the frame $\bPhi$.
   \end{corollary}





\section{Exercises} %Matrix Representation

 \begin{exercise}\rm\Amark\ In each of the following you are given
 vector spaces $\VV$ and $\WW$, frames
 $\bPhi:\F^{n\times 1}\to\VV$ and $\bPsi:\F^{m\times 1}\to\WW$,
 and a linear map $\TT:\VV\to\WW$.
 Find the matrix $A\in\F^{m\times n}$ which
 represents the map $\TT$ in the frames
$\bPhi$ and $\bPsi$.
 \begin{description}
  \item[(1)] $\VV=\Poly_2(\F)$, $\WW=\Poly_1(\F)$,
  $\bPhi(X)(\xy)=x_1+x_2\xy+x_3\xy^2$,
  $\bPsi(Y)(\xy)=y_1+y_2\xy$, $\TT(f)=f'$.
  \item[(2)] $\VV$, $\WW$, $\bPhi$, $\bPsi$ as in~(1),
      $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$.
  \item[(3)] $\VV=\Cos_2(\F)$, $\WW=\Sin_1(\F)$,
  $\bPhi(X)(\xy)=x_1+x_2\cos(\xy)+x_3\cos(2\xy)$,
  $\bPsi(Y)(\xy)=y_1\sin(\xy)+y_2\sin(2\xy)$, $\TT(f)=f'$.
  \item[(4)] $\VV$,  $\bPhi$  as in~(1),
        $\WW=\F^{1\times 3}$, $\bPsi(Y)=Y\tr$,
      $$
 \TT(f)(\xy)= \Mat{rrr}f(0)&f(1)&f(2)\Rix.
$$
  \end{description}
    Here $x_j=\entry_j(X)$ and $y_i=\entry_i(Y)$.
 \end{exercise}


 \begin{exercise}\rm\Amark\ In each of the following you are given
 a vector space $\VV$,
 a frame $\bPhi:\F^{n\times 1}\to\VV$,
 and a linear map $\TT:\VV\to\VV$ from $\VV$ to itself.
 Find the matrix $A\in\F^{n\times n}$.
which  represents the map $\TT$ in the frame $\bPhi$.
\begin{description}
 \item[(1)] $\VV=\Poly_2(\F)$,  $\bPhi(X)(\xy)=x_1+x_2\xy+x_3\xy^2$,
            $\TT(f)=f'$.
 \item[(2)] $\VV$ and $\bPhi$ as in~(1),
            $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$.
 \item[(3)] $\VV=\Trig_1(\F)$, $\bPhi(X)(\xy)=x_1+x_2\cos(\xy)+x_3\sin(\xy)$,
            $\TT(f)=f'$.
 \item[(4)] $\VV$ and  $\bPhi$  as in~(3),
            $\TT(f)(\xy)= (f(\xy+h)-f(\xy))/h$.
  \end{description}
Here $x_j=\entry_j(X)$.
\end{exercise}




 \begin{exercise}\rm\Amark\
  What is the dimension of the vector space
$\LMAP(\VV,\WW)$ of linear maps from $\VV$ to $\WW$?
\ifanswer Assume $n=\dim(\VV)$ and $m=\dim(|WW)$ and
let $\bPhi:\F^{n\times 1}\to\VV$ and
$\bPsi:\F^{m\times 1}\to\WW$ be frames. Then there
is an isomorphism
 $$
    \F^{m\times n}\to\LMAP(\VV,\WW): A\mapsto \bPsi\circ\Aa\circ\bPhi^{-1}
 $$
(where $\Aa$ is the matrix map determined by $A$) so
 $$
   \dim(\LMAP(\VV,\WW) = \dim(\F^{m\times n}) = mn.
 $$
\fi
 \end{exercise}


\begin{exercise}\rm Let
$$
\begin{array}{ll}
 \bphi_1(\xy)=1       &  \bpsi_1(\xy)=(\xy-2)(\xy-3)/2 \\
 \bphi_2(\xy)=\xy    &  \bpsi_2(\xy)=-(\xy-1)(\xy-3) \\
 \bphi_3(\xy)=\xy^2  &  \bpsi_3(\xy)=(\xy-1)(\xy-2)/2
\end{array}
$$
\noindent Each of the sequences
$(\bphi_1,\bphi_2,\bphi_3)$ and
$(\bpsi_1,\bpsi_2,\bpsi_3)$
is a basis  for $\Poly_2(\F)$.
Find the transition matrix  from
$(\bpsi_1,\bpsi_2,\bpsi_3)$ to
$(\bphi_1,\bphi_2,\bphi_3)$.
Find the transition matrix  from
$(\bphi_1,\bphi_2,\bphi_3)$ to
$(\bpsi_1,\bpsi_2,\bpsi_3)$.
\end{exercise}

 \begin{exercise}\rm
  Let $(\bphi_1,\bphi_2,\bphi_3,\bphi_4,\bphi_5)$ be as basis
  for a vector space $\VV$. Find the transition matrix
  from this basis to the basis $(\bphi_3,\bphi_5,\bphi_2,\bphi_1,\bphi_4)$.
 \end{exercise}


\begin{exercise}\rm
In each of the following,  you are given a linear map
$\TT:\VV\to\WW$ and frames $\bPhi:\F^{n\times 1}\to\VV$
and $\bPsi:\F^{m\times 1}\to\WW$. Find the matrix $A$ representing
$\TT$ in the frames $\bPhi$ and $\bPsi$. Also say if $\TT$ is one-one
and if it is onto.

\begin{description}
\item[(1)] $\VV=\Poly_3(\F)$, $\WW=\Poly_2(\F)$, $\TT(f)=f'$,
  $\bpsi_i(\xy)=\xy^{i-1}$ for $i=1,2,3$.

\item[(2)]
   $\VV=\Poly_3(\F)$, $\WW=\F^{1\times 3}$,
$T(f) = \Mat{lll}f(1)&f(2)&f(3)\Rix$,
$\bphi_j(\xy)=\xy^{j-1}$ for $j=1,2,3,4$,
$\bpsi_i=\row_i(I_3)$.

\item[(3)] $\VV=\F^{3\times 1}$, $\WW=\F^{2\times 1}$,
$\TT(X)=\Mat{r}3x_1+x_3\\x_2+6x_3\Rix$,
$\bphi_j=\col_j(I_3)$, $\bpsi_i=\col_i(I_2)$. (Here $x_j=\entry_j(X)$.)

\item[(4)] $\VV=\F^{3\times 1}$, $\WW=\F^{2\times 1}$,
$\TT(X)=\Mat{r}3x_1+x_3\\x_2+6x_3\Rix$,
$\bphi_j=\col_j(P)$, $\bpsi_i=\col_i(Q)$, where
 $$
   P=\Mat{rrr}1&2&3\\ 4&5& 6\\ 0 & 0 & 1\Rix,\;\;
   Q = \Mat{rr}2&1\\1&1\Rix.
 $$

\item[(5)] \sloppy $\VV=\Cos_n(\F)$, $\WW=\Sin_n(\F)$, $\TT(f)=f'$,
   $\bphi_j(\xy)=\cos(j-1)\xy$, $\bpsi_k(\xy_=\sin(k\xy)$.

\item[(6)]
$\VV=\{\Mat{lll}x&y&z\Rix:x+2y+3z=0\}$,
$\WW=\F^{1\times 2}$,
$\TT(\Mat{lll}x&y&z\Rix)=
\Mat{ll}x&y\Rix$,
$\bphi_1=\Mat{lll}-3&0&1\Rix$,
$\bphi_1=\Mat{lll} 0&-3&2\Rix$,
$\bpsi_1=\Mat{ll}1&0\Rix$,
$\bpsi_2=\Mat{ll}0&1\Rix$.

\item[(7)]
$\VV=\Poly_3(\F)$,
$\Poly_2(\F)$,
$ T(f)(\xy) = f'(\xy+1)$,
$\bphi_j(\xy)=\xy^{j-1}$,
$\bpsi_j(\xy)=\xy^{j-1}$.
\end{description}
 \end{exercise}


 \begin{exercise}\rm
  For each of the map $\TT:\VV\to\WW$ of the previous problem
find a frame $\Tilde{\bPsi}:\F^{m\times 1}\to\WW$ such that the
matrix representing $\TT$ in the frames $\bPhi$ and $\Tilde{\bPsi}$
is in reduced row echelon form.
 \end{exercise}

 \begin{exercise}\rm
  For each of the map $\TT:\VV\to\WW$ of the previous problem
find  frames $\Tilde{\bPhi}:\F^{n\times 1}\to\VV$ and
$\Tilde{\bPsi}:\F^{m\times 1}\to\WW$
such that the matrix representing $\TT$
in the frames $\Tilde{\bPhi}$ and $\Tilde{\bPsi}$
is in zero-one diagonal form.
 \end{exercise}

\begin{exercise}\rm Let $\TT:\Poly_3(\F)\to\F^{1\times 3}$ be defined by
$$
     \TT(f) = \Mat{ccc} f(1) & f'(1) & f(1)\Rix.
$$
\begin{description}
\item[(1)] Find a basis for the null space
of $\TT$ and extend it to a basis for $\Poly_3(\F)$.
\item[(2)] Find a basis for the range
of $\TT$ and extend it to a basis
for $\F^{1\times 3}$.
\item[(3)] Find the matrix representing $T$ in these frames.
\end{description}
Is $T$ one-one? onto?
\end{exercise}

 \begin{exercise}\rm In each of the following,
you are given a linear map $\TT:\VV\to\VV$
and a frame $\bPhi:\F^{n\times 1}\to\VV$. Find the matrix
$A$ representing the map $\TT$ in the frame $\bPhi$.
 \begin{description}
   \item[(1)] $\VV=\Poly_n(\F)$, $\TT(f)(\xy)=f(\xy+a)$, $\bphi_j(\xy)=\xy^{j-1}$.
   \item[(2)] $\VV=\Trig_n(\F)$, $\TT(f)(\xy)=f(\xy+a)$,
    $\bphi_j(\xy)=e^{(n+1-j)i\xy}$.
   \item[(3)] $\VV=\Poly_n(\F)$, $\TT(f)(\xy)=f'(\xy)$, $\bphi_j(\xy)=\xy^{j-1}$.
   \item[(4)] $\VV=\Trig_n(\F)$, $\TT(f)(\xy)=f'(\xy)$,
    $\bphi_j(\xy)=e^{(n+1-j)i\xy}$.
   \item[(5)] $\VV=\Poly_n(\F)$, $\TT(f)(\xy)=f''(\xy)$, $\bphi_j(\xy)=\xy^{j-1}$.
   \item[(6)] $\VV=\Trig_n(\F)$, $\TT(f)(\xy)=f''(\xy)$,
    $\bphi_j(\xy)=e^{(n+1-j)i\xy}$.
 \end{description}
 \end{exercise}

 \begin{exercise}\rm
For each of the maps $\TT:\VV\to\VV$ of the previous
problem, find its eigenvalues and eigenvectors.
 \end{exercise}


 \begin{exercise}\rm\Amark\ \label{ex:diag}
   Suppose that $\VV$ is a vector space of dimension $n$
and that the linear map $\TT:\VV\to\VV$ has $n$ distinct eigenvalues.
Show there is a basis of $\VV$ consisting of eigenvectors of $\TT$.
Hint: The key point is that the sequence of
eigenvectors is independent. This can be proved by assuming
a linear relation and applying $f(\TT)$ for various
polynomials $f(t)$. See Exercise~\ref{ex:Lagrange}.
 \end{exercise}


\begin{exercise}\rm\label{ex:nodiag}
Show that the matrix
$$
      N=\Mat{cc} 0 & 1\\ 0 & 0 \Rix
$$
is not diagonalizable, i.e. there is no
invertible matrix $P$ such that $P^{-1}AP$
is a diagonal matrix.
\end{exercise}

 \begin{exercise}\rm
Define $\TT:\Poly_n(\F)\to\Poly_n(\F)$ by
 $$
   \TT(f)(\xy)=f(\xy+b)
 $$
where $b$ is a constant. Find the eigenvalues of $\TT$.
Is $\TT$ diagonalizable? Hint: Find the matrix representing
$\TT$ in the standard basis $\bphi_j(\xy)=\xy^{j-1}$. If you
can't do the general case try the case $n=1$ first.
 \end{exercise}


 \begin{exercise}\rm
Define $\TT:\Poly_n(\F)\to\Poly_n(\F)$ by
 $$
   \SS(f)(\xy)=f(b\xy)
 $$
where $b$ is a constant. Find the eigenvalues of $\TT$.
Is $\TT$ diagonalizable?
 \end{exercise}

 \begin{exercise}\rm
Define $\TT:\Trig_n(\F)\to\Trig_n(\F)$ by
 $$
   \TT(f)(\xy)=f(\xy+b)
 $$
where $b$ is a constant. Find the eigenvalues of $\TT$.
Is $\TT$ diagonalizable?
 \end{exercise}


 \begin{exercise}\rm The matrix
 $$
   A=\Mat{cccc}
    0 & a_{12} & a_{13} & a_{14} \\
    0 & 0      & a_{23} & a_{24} \\
    0 & 0      & 0      & a_{34} \\
    0 & 0      & 0      & 0
\Rix
 $$
satisfies $\entry_{ij}(A)=0$ for $j<i+1$ and the matrix
 $$
   B=\Mat{cccc}
    0 & 0 & b_{13} & b_{14} \\
    0 & 0 & 0      & b_{24} \\
    0 & 0 & 0      & 0 \\
    0 & 0 & 0      & 0
\Rix
 $$
satisfies $\entry_{jk}(B)=0$ for $k<j+2$.
Compute $AB$ and conclude that it satisfies
$\entry_{ik}(AB)=0$ for $k<i+3$.
 \end{exercise}

  \begin{exercise}\rm
    A square matrix $A\in\F^{n\times n}$ is called
    $p$-\jdef{triangular} iff
 $$
    \entry_{ij}(A)=0 \mbox{ for $j<i+p$}.
 $$
 Thus the terms {\em $0$-triangular} and {\em triangular} are synonymous,
 and the terms  {\em $1$-triangular} and {\em strictly triangular}
are synonymous.
 Show that if $A$ is $p$-triangular matrix and $B$ is
 $q$-triangular, then $AB$ is $(p+q)$-triangular.
 Hint: You can, of course, simply calculate $\entry_{ik}(AB)$
 and show that it is zero for $k<i+p+q$. However, it is more
 elegant to express the property of being $p$-triangular
 in terms of the standard flag.
  \end{exercise}


 \begin{exercise}\rm\label{REP-stn}
  A matrix $N\in\F^{n\times n}$ is called \jdef{nilpotent}
  iff $N^p=0$ for some positive integer $p$. Show that
  a strictly triangular matrix $N$ is nilpotent.
 \end{exercise}

 \begin{exercise}\rm
 Let $U=I-N$ where $I=I_3$ is the $3\times 3$ identity matrix and
 $$
   N=\Mat{ccc}0&a&b\\0&0&c\\0&0&0 \Rix.
 $$
Show that $N^3=0$ and $U^{-1}=I+N+N^2$.
\end{exercise}



 \begin{exercise}\rm
A square matrix $U$ is called \jdef{unipotent} iff it is the sum
of the identity matrix and a nilpotent matrix.
Show that a unipotent matrix is invertible.
(Hint: Factor $I-N^n$ to find a formula for the inverse
of $U=I-N$.)
\ifanswer   Let $U\in\F^{n\times n}$ be unipotent. Then
 $$
   U=I-N
 $$
where $I=I_n$ is the identity matrix and $N$ is nilpotent.
The identity
 $$
   I-N^n=(I-N)(I+N+N^2+\cdots+N^{n-1})
 $$
holds for matrices as well as for numbers. But $N^n=0$ so this formula
says
 $$
   I=UV
 $$
where
 $$
   V=I+N+N^2+\cdots+N^{n-1}.
 $$
Similarly $VU=I$ so $U$ is invertible and $V=U^{-1}$.
\fi
\end{exercise}



     \begin{exercise}\rm \label{ex:uni-triangular}
   Call a square matrix \jdef{uni-triangular} iff
it is triangular and all its diagonal entries are one.
Show that a uni-triangular matrix is invertible.
\ifanswer A matrix is uni-triangular iff it is the sum of
an identity matrix with a strictly triangular matrix.
Hence it is unipotent and thus invertible. Alternatively,
note that a uni-triangular matrix is triangular with
non-zero entries on the diagonal and use the
Triangular Invertibility Theorem \frompage{tri-inv}.
\fi
\end{exercise}


 \begin{exercise}\rm\Amark\
 A triangular matrix $A\in\F^{3\times 3}$ may be written as
$A=DU$ where
 $$
   A=\Mat{ccc}a&b&c\\ 0&d&e \\ 0&0&f \Rix,\;\;\;
   D=\Mat{ccc}a&0&0\\ 0&d&0 \\ 0&0&f \Rix,\;\;\;
   U=\Mat{rrr}
         1 & a^{-1}b & a^{-1}c\\ 0 & 1 & d^{-1}e   \\ 0 & 0 & 1
      \Rix.
 $$
Find $A^{-1}$. (Don't forget that $(DU)^{-1}=U^{-1}D^{-1}$.)
\ifanswer Using the formula $A^{-1} = (DU)^{-1} =U^{-1}D^{-1}$ we obtain
  \begin{eqnarray*}
   A^{-1} &=&
    \Mat{lll}
      1   &-  a^{-1}b    &  (ad)^{-1}be-a^{-1}c  \\
      0   &\sig 1        &  -d^{-1}e \\
      0   &\sig 0        &  \sig 1
    \Rix
   \Mat{ccc}
       a^{-1}      &0       &0\\
       0           &d^{-1}  &0 \\
       0           &0       &f^{-1}
    \Rix\\
   &=&
    \Mat{lll}
      a^{-1}   &  -  (ad)^{-1}b    &     (adf)^{-1}be-(af)^{-1}c  \\
      0   &d^{-1}    &        -(df)^{-1}e \\
      0   &0    & f^{-1}
    \Rix.
 \end{eqnarray*}
\fi
 \end{exercise}

 \begin{exercise}\rm
  Suppose that $A$ is invertible and triangular.
  Show  that $A=DU$
  where $D$ is invertible diagonal  and
  $U$ is a uni-triangular.
  Use this to find a formula for $A^{-1}$.
 \end{exercise}


\begin{exercise}[Important]\rm
Let $\TT:\VV\to\WW$ be a linear map between
finite dimensional vector spaces.
Show that $\TT$ is one-one if and only if $\TT^*$ is onto
and  that $\TT$ is onto if and only if $\TT^*$ is one-one.
(See Exercises~\ref{exr:dualspace} and~\ref{exr:dualmap}.)
\end{exercise}


\begin{exercise}[Important]\rm\label{exer:rank}
Let $\Aa:\VV_1\to\WW_1$ and $\Bb:\VV_2\to\WW_2$ be a linear maps between
finite dimensional vector spaces. Say that that
$\Aa$ and $\Bb$ are \jdef{equivalent} iff
there exist isomorphisms $\Pp:\VV_2\to\VV_1$ and
$\Qq:\WW_2\to\WW_1$ such that
$$
    \Aa = \Qq\circ\Bb\circ\Pp^{-1}.
$$
Show that $\Aa$ and $\Bb$ are equivalent if and only if
$\VV_1$ and $\VV_2$ have the same dimension,
$\WW_1$ and $\WW_2$ have the same dimension,
and
$\Aa$ and $\Bb$ have the same rank.
\end{exercise}

\chapter{Block Diagonalization}

Not every square matrix can be diagonalized.
In this chapter we will see that
every square matrix can be ``block diagonalized''

\section{Direct Sums}

   Let $\VV$ be a vector space. The notation
    $$
       \VV=\WW\oplus\UU
     $$
 says that $\VV$ is the \jdef{direct sum} of $\WW$ and $\UU$.
 This means that $\WW$ and $\UU$ are subspaces of $\VV$ and that
 for every $\vv\in\VV$ there are unique  $\ww\in\WW$ and $\uu\in\UU$
 such that
  $$
     \vv=\ww+\uu.
   $$
More generally,
the notation
 $$
    \VV=\VV_1\oplus\VV_2\oplus\cdots\oplus\VV_m
 $$
means that the spaces $\VV_i$ ($i=1,2,\ldots,m$)
are subspaces of $\VV$ and
for  for every $\vv\in\VV$ there are unique vectors
$\vv_i\in\VV_i$ such that
  $$
     \vv=\vv_1+\vv_2+\cdots+\vv_m.
  $$
Another notation for the direct sum, analogous to the sigma notation for
ordinary sums,  is
 $$
   \VV=\bigoplus_{j=1}^m \VV_j.
 $$
When $\VV=\bigoplus_{j=1}^m \VV_j$ we say the subspaces
$\VV_j$ give a \jdef{direct sum decomposition} of $\VV$.
When $\VV=\WW\oplus\UU$,
one says that the subspace $\UU$ of $\VV$ is a \jdef{complement}
to the subspace $\WW$ in the vector space $\VV$.

To prove the equation $\VV=\WW\oplus\UU$ we must show four things:
 \begin{description}
  \item[(1)] $\WW$ is a subspace of $\VV$.
  \item[(2)] $\UU$ is a subspace of $\VV$.
  \item[(3)] $\VV=\WW+\UU$ which means that  every $\vv\in\VV$ has form
             $\vv=\ww+\uu$ for  some $\ww\in\WW$ and $\uu\in\UU$.
  \item[(4)] $\WW\cap\UU=\{\0\}$ which means that the only $\vv\in\VV$
             which is in both $\WW$ and $\UU$ is $\vv=\0$.
 \end{description}

 \begin{remark}[Uniqueness Remark]\rm \label{uniq-rmk}
 Part~(4) relates to the uniqueness of the decomposition.
If $\ww_1,\ww_2\in\WW$ and $\uu_1,\uu_2\in\UU$ satisfy
  $$
      \ww_1+\uu_1=\ww_2+\uu_2,
   $$
 then $\ww_1-\ww_2=\uu_2-\uu_1\in\WW\cap\UU$.
Then part~(4) implies that $\ww_1-\ww_2=\uu_2-\uu_1=\0$, that is,
that $\ww_1=\ww_2$ and $\uu_1=\uu_2$,
so that the representation is unique.
On the other hand, if part~(4) fails, then there is a non-zero
$\vv\in\WW\cap\UU$. Then $\0\in\VV$ has two distinct representations,
$\0=\0+\0$ and  $\0=\vv+(-\vv)$,
as the sum of an element of $\WW$ and an element of $\UU$,
so that the representation is {\em not} unique.

 The first thing to understand is that a subspace has many complements.
 For example, take $\VV=\F^{2\times 1}$ and let $\WW$ be the horizontal axis:
  $$
     \WW =\left\{
   \left[\begin{array}{c}x_1\\ 0\end{array}\right]:
         x_1\in\F\right\}.
   $$
Then for any $b\in\F$ the space
 $$
    \UU = \left\{
    \left[\begin{array}{c} bx_2\\x_2\end{array}\right]:
          x_2\in\F\right\}
  $$
is a complement to $\WW$ since any $X\in\VV=\F^{2\times 1}$ can be
decomposed as
   $$
      \left[\begin{array}{c} x_1 \\ x_2\end{array}\right] =
      \left[\begin{array}{c} x_1-bx_2\\ 0 \end{array}\right] +
      \left[\begin{array}{c} bx_2 \\ x_2\end{array}\right].
    $$
Note that different values of $b$ give different complements
$\UU$ to $\WW$. Geometrically, {\em any line through the origin
and distinct from $\WW$ is a complement to $\WW$ in $\VV=\F^{2\times 1}$.}

\begin{figure}
\setlength{\unitlength}{0.1in}
 \begin{picture}(50,30)
     \put(5,10){\line(1,0){40}} \put(2,9.5){$\WW$}
     \put(7,4){\line(1,2){13}}  \put(6,2){$\UU$}
     \thicklines
     \put(10,10){\vector(1,0){10}} \put(20,8){$\ww$}
     \put(10,10){\vector(1,2){5}}  \put(13,20){$\uu$}
     \put(10,10){\vector(3,2){15}} \put(27,20){$\vv=\ww+\uu$}
 \end{picture}
\caption{$\VV=\WW\oplus\UU$}
\end{figure}

 \end{remark}




 \begin{proposition}\label{c-b}
   Let $\VV$ be a vector space and $\WW,\UU\subseteq\VV$
be subspaces of $\VV$. Suppose that
 \begin{description}
   \item[(1)] $(\bphi_1,\bphi_2,\ldots,\bphi_m)$ is a basis for $\WW$,
   \item[(2)] $(\bphi_{m+1},\bphi_{m+2},\ldots,\bphi_n)$ is a basis for $\UU$,
   \item[(3)] $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ is a basis for $\VV$,
 \end{description}
Then $\VV=\WW\oplus\UU$.
 \end{proposition}


\Proof{} To show that $\VV=\WW+\UU$ choose $\vv\in\VV$.
By~(3) there are numbers $x_1,x_2.\ldots,x_n$ with
 $$
   \vv=\sum_{j=1}^n x_j\bphi_j.
 $$
Then $\vv=\ww+\uu$ where
 $$
   \ww=\sum_{j=1}^m x_j\bphi_j,\;\;\;
   \uu=\sum_{j=m+1}^n x_j\bphi_j.
 $$
By~(1) we have that $\ww\in\WW$ and by~(2) we have that $\uu\in\UU$.
To show that $\WW\cap\UU=\{\0\}$ choose $\vv$ in this intersection.
Then by~(1) and~(2) there are numbers $x_1,x_2,\ldots,x_n$ with
 $$
   \vv=\sum_{j=1}^m x_j\bphi_j=\sum_{j=m+1}^n x_j\bphi_j.
 $$
Hence
 $$
   \0=\sum_{j=1}^m x_j\bphi_j-\sum_{j=m+1}^n x_j\bphi_j.
 $$
so $x_1=x_2=\cdots=x_n=0$ by~(3). Hence $\vv=\0$.\QED

 \begin{corollary}
   Let $\WW$ be a subspace of $\VV$.
To find a complement $\UU$ to $\WW$ in $\VV$ proceed as follows:
 \begin{description}
   \item[-] Find a basis $(\bphi_1,\bphi_2,\ldots,\bphi_m)$ for $\WW$.
   \item[-] Extend it to  a basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ for $\VV$.
   \item[-] Define $\UU=\Span(\bphi_{m+1},\bphi_{m+2},\ldots,\bphi_n)$.
 \end{description}
 \end{corollary}

 \begin{corollary}
   Suppose $\VV=\WW\oplus\UU$ with $\dim(\VV)=n$.
Then there is a frame
$$
\bPhi:\F^{n\times 1}\to\VV
$$
such that
 \begin{eqnarray*}
  \bPhi^{-1}(\WW) &=& \{X\in\F^{n\times 1}:x_{m+1}=x_{m+2}=\cdots x_n=0\}\\
  \bPhi^{-1}(\UU) &=& \{X\in\F^{n\times 1}:x_1=x_2=\cdots x_m=0\}.
 \end{eqnarray*}
 \end{corollary}

For each pair $(m,n)$ of integers with $0\le m\le n$ there is a
\jdef{standard direct sum}
 $$
     \F^{n\times 1} =\WW_m^n\oplus\UU_m^n
 $$
where
 $$
   \WW_m^n = \left\{
           \left[\begin{array}{l} X_1 \\ 0 \end{array}\right]
           X_1\in\F^{m\times 1},\;\; 0=0_{(n-m)\times 1}
           \right\},
 $$
 $$
   \UU_m^n = \left\{
             \left[\begin{array}{l} 0 \\ X_2 \end{array}\right]
           X_2\in\F^{(n-m)\times 1},\;\; 0=0_{m\times 1}
           \right\}.
 $$
The decomposition of $X\in\F^{n\times 1}$ into an element
of $\WW_m^n$ and an element of $\UU_m^n$ is given by
 $$
   \left[\begin{array}{c}X_1 \\ X_2 \end{array}\right]=
   \left[\begin{array}{c}X_1 \\ 0   \end{array}\right]+
   \left[\begin{array}{c} 0 \\X_2   \end{array}\right].
 $$
The corollary says that any direct sum decomposition
is isomorphic to a standard one: If $\VV=\WW\oplus\UU$,
then there is a frame $\bPhi$ for $\VV$ with
 $$
   \WW=\bPhi(\WW_m^n),\;\; \UU=\bPhi(\UU_m^n).
 $$

\section{Idempotents}

 \begin{definition}\rm An \jdef{idempotent}
on a vector space $\VV$ is a linear map
 $$
   \bPi:\VV\to \VV
 $$
 from $\VV$ to itself which is its own square:
 $$
   \bPi\circ\bPi=\bPi.
 $$
A square matrix $\Pi\in\F^{n\times n}$ is called an
\jdef{idempotent} iff the corresponding matrix
map is an idempotent, that is, iff $\Pi^2=\Pi$.
The word {\em idempotent} means {\em same power} and comes
from the obvious fact that for an idempotent we have
 $$
   \bPi^p=\bPi
 $$
for all positive integers $p$.
We also call a square matrix $\Pi\in\F^{n\times n}$
an \jdef{idempotent} if the corresponding matrix map is
an idempotent, that is, if $\Pi^2=I_n$.
\end{definition}

The simplest examples of idempotent matrices
are square matrices in zero-one diagonal form. Thus the matrix
 $$
   \Pi = \left[\begin{array}{ll}
           I_r                & 0_{r\times(n-r)} \\
           0_{(n-r)\times r} & 0_{(n-r)\times (n-r)}
         \end{array}\right]
 $$
satisfies $\Pi^2=\Pi$ so the corresponding matrix map is an idempotent.
Note that
 $$
   \Pi = D\tr D
 $$
where
 $$
   D = \left[\begin{array}{ll}
           I_r               & 0_{r\times(n-r)}
         \end{array}\right],\;\;
   D\tr = \left[\begin{array}{l}
           I_r                \\
           0_{(n-r)\times r}
         \end{array}\right].
 $$
Of course, if $\Pi$ is an idempotent, and $P\in\F^{n\times n}$
is invertible, then $P\Pi P^{-1}$ is an idempotent. This is because
 \begin{eqnarray*}
   (P\Pi P^{-1})^2 &=& P\Pi P^{-1}P\Pi P^{-1}\\
                   &=& P\Pi^2  P^{-1} \\
                   &=& P\Pi  P^{-1}.
 \end{eqnarray*}

 \begin{remark}\rm A map $\bPi:\VV\to\VV$ is an idempotent
iff its range is its fixed point set, that is, iff
 $$
   \Range(\bPi) = \{\ww\in\VV: \bPi(\ww)=\ww\}.
 $$
\rm Indeed, this equation clearly implies that
$\bPi^2(\vv)=\bPi(\vv)$ for $\vv\in\VV$ since
$\ww=\bPi(\vv)\in\Range(\bPi)$. Conversely,
any fixed point is clearly in the range:
if $\ww=\bPi(\ww)$, then $\ww\in\Range(\bPi)$,
and, if $\bPi^2=\bPi$, then any vector
$\ww=\bPi(\vv)\in\Range(\bPi)$ in the range is a fixed point.
\end{remark}


 \begin{theorem}[Direct Sums and Idempotents]
 There is a one-one   onto correspondence between
the set of idempotents  $\VV$ and
the set of direct sum decompositions of $\VV$.
$\VV=\WW\oplus\UU$.
The idempotent $\bPi$ and the direct sum decomposition
$\VV=\WW\oplus\UU$ \jdef{correspond} iff
 \begin{eqnarray*}
   \WW &=& \Range(\bPi), \\
   \UU &=& \NULLSP(\bPi),
 \end{eqnarray*}
that is, $\WW$ and $\UU$ are range and null space of $\bPi$
respectively.
 \end{theorem}

\proof{} Exercise. Do Exercise~\ref{ex:product} first.

 \begin{question}\rm
   What is the idempotent corresponding to the direct sum
   decomposition in the example (with $\VV=\F^{2\times 1}$)
   after Remark~\ref{uniq-rmk}?
   (Answer: The matrix (map  determined by)
    $\Pi=\left[\begin{array}{rr} 1 & -b \\ 0 & 0 \end{array}\right]$.)
 \end{question}

 \begin{proposition}
   Suppose $\VV=\WW\oplus\UU$ and let $\bPi$ be the
corresponding idempotent:
 $$
   \WW=\Range(\bPi),\;\; \UU=\NULLSP(\bPi).
 $$
Then $\II-\bPi$ is an idempotent and the
corresponding direct sum decomposition is
$\VV=\UU\oplus\WW$:
 $$
   \UU = \Range(\II-\bPi),\;\; \WW=\NULLSP(\II-\bPi).
 $$
Here $\II=\II_\VV$ is the identity map of $\VV$.
 \end{proposition}

\Proof{} Note that
 $$
   (\II-\bPi)\circ\Pi = \bPi-\bPi^2=\bPi-\bPi=\0
 $$
so
 $$
   (\II-\bPi)^2= (\II-\bPi)
 $$
which show that $\II-\bPi$ is an idempotent. For the rest
note that
 \begin{eqnarray*}
   \ww\in\Range(\bPi)
      &\iff& \bPi(\ww)=\ww \\
      &\iff& (\II-\bPi)(\ww)=\0 \\
      &\iff& \ww\in\NULLSP(\II-\bPi)
 \end{eqnarray*}
so that $\Range(\bPi)=\NULLSP(\II-\bPi)$ and similarly
(reading $\II-\bPi$ for $\bPi$)
$\Range(\II-\bPi)=\NULLSP(\pi)$.\QED






Two idempotents $\bPi_1$ and $\bPi_2$ of $\VV$
are called \jdef{disjoint} iff
$
\bPi_1\circ\bPi_2 = \bPi_2\circ\bPi_1 = \0.
$
A \jdef{splitting} of $\VV$ is
a sequence of pairwise disjoint idempotents
of $\VV$ which sum to the identity. Thus
a given sequence $\bPi_1,\bPi_2,\ldots,\bPi_m$
of linear maps from $\VV$ to itself is  a splitting iff
it satisfies
 \begin{description}
   \item[(1)] $\II= \bPi_1+\bPi_2+\cdots+\bPi_m$,
   \item[(2)] $\bPi_i\circ\bPi_j = \0$  for $i\ne j$,
   \item[(3)] $\bPi_i^2=\bPi_i$  for $i=1,2,\ldots,m$.
 \end{description}
where $\II=\II_\VV$ the identity map of $\VV$.



 \begin{theorem}[Decompositions and Splittings]
There is a one-one   onto correspondence between
direct sum decompositions and splittings.
The direct sum decomposition $\VV=\bigoplus_{i=1}^m\VV_i$
and the splitting $\II=\sum_{i=1}^m\bPi_i$
\jdef{correspond} iff
$$
   \VV_i=\Range(\bPi_i)
$$
for $i=1,2,\ldots,m$.
 \end{theorem}


\Proof{} Three things are asserted.
 \begin{description}
   \item[(i)] If $\II=\sum_i\bPi_i$ is a splitting
              and $\VV_i=\Range(\bPi_i)$, then
              $\VV=\bigoplus_{i=1}^m\VV_i$.
   \item[(ii)] Every direct sum decomposition arises  this way.
   \item[(iii)] If $\II=\sum_i\bPi_i^{(1)}$ and $\II=\sum_i\bPi_i^{(2)}$
                are splittings and
                $\Range(\bPi_i^{(1)})=\Range(\bPi_i^{(2)})$ for $i=1,2,\ldots,m$,
                then $\bPi_i^{(1)}=\bPi_i^{(2)}$ for $i=1,2,\ldots,m$,
 \end{description}

\Proof{ of~(i)} We show that any  $\vv\in\VV$
has a unique decomposition
$$
   \vv=\vv_1+\vv_2+\cdots+\vv_m \eqno{(\heartsuit)}
$$
with $\vv_i\in\Range(\Pi_i)$. Condition~(1) gives the existence
of this decomposition: we simply define $\vv_i=\bPi_i(\vv)$.
Conditions~(2) and~(3) gives the uniqueness of the decomposition.
To see this, apply $\bPi_i$ to~$(\heartsuit)$. We obtain
 $$
   \bPi_i(\vv)=\bPi_i(\vv_i)
 $$
by~(2) and hence
 $$
   \bPi_i(\vv)=\vv_i
 $$
by~(3).

\Proof{ of~(ii)} Define
 $$
   \bPi_i(\vv) = \vv_i
 $$
where $\vv_1,\vv_2,\ldots,\vv_m$ are defined by~$(\heartsuit)$.
The maps $\bPi_i$ are well-defined since the
decomposition~$(\heartsuit)$ is unique.
The reader can check that the maps $\bPi_i$ are
linear and satisfy conditions~(1)-(3).

\Proof{ of~(iii)} If the decomposition $\VV=\bigoplus_i\VV_i$
and the splitting $\II=\sum_i\bPi_i$ correspond, then
 \begin{eqnarray*}
   \bPi_i(\vv) &=& \vv \mbox{ for $\vv\in\VV_i$}\\
                &=& \0  \mbox{ for $\vv\in\VV_j$, $i\ne j$.}
 \end{eqnarray*}
These conditions determine $\bPi_i$ uniquely since ever $\vv\in\VV$
is a sum of elements in the various $\VV_j$.
\QED



A sequence of square matrices of the same size, say $n\times n$,
is called a \jdef{splitting} of $I_n$ iff the corresponding
sequence of matrix maps is a splitting of $\F^{n\times 1}$.
Thus the sequence $(\Pi_1,\Pi_2,\ldots,\Pi_m)$ is a splitting
of $I_n$ iff $\Pi_i\in\F^{n\times n}$ for $i=1,2,\ldots,m$ and
 \begin{description}
   \item[(1)] $I= \Pi_1+\Pi_2+\cdots+\Pi_m$,
   \item[(2)] $ \Pi_i\Pi_j = 0$  for $i\ne j$,
   \item[(3)] $ \Pi_i^2=\Pi_i$  for $i=1,2,\ldots,m$.
 \end{description}
where $I=I_n$ the $n\times n$ identity matrix.

It is easy to make examples.
For any sequence
 $$
   \nu=(n_1,n_2,\ldots,n_m)
 $$
of positive integers which sums to $n$:
 $$
   n_1+n_2+\cdots+n_m=n,
 $$
we define the \jdef{standard splitting} of
$I_n$ determined by $\nu$ by the equations
\begin{eqnarray*}
   \entry_{jj}(\Pi_i) &=& 1 \mbox{ for $s_{i-1}<j\le s_i$}\\
                      &=& 0 \mbox{ for $j\le s_{i-1}$ or $s_i<j$}\\
   \entry_{kj}(\Pi_i) &=& 0 \mbox{ for $k\ne j$}
\end{eqnarray*}
where
 $$
   s_i =n_1+n_2+\cdots+n_i
 $$
(with $s_0=0$).
For example, with $n=8$, $m=4$, and $\nu=(3,2,2,1)$ we have
 \begin{eqnarray*}
   \Pi_1 &=& \diag(1,1,1,0,0,0,0,0)\\
   \Pi_2 &=& \diag(0,0,0,1,1,0,0,0)\\
   \Pi_3 &=& \diag(0,0,0,0,0,1,1,0)\\
   \Pi_4 &=& \diag(0,0,0,0,0,0,0,1)
 \end{eqnarray*}

There are many other splittings of $I_n$ besides the
standard ones: given one splitting we can make another via
 $$
   I_n=Q\Pi_1 Q^{-1}+Q\Pi_2Q^{-1}+\cdots+Q\Pi_mQ^{-1}.
 $$
\section{Invariant Decomposition}

  Let $\TT:\VV\to\VV$ be a linear map from a vector space
to itself. A subspace $\WW\subseteq\VV$ is called $\TT$-\jdef{invariant}
iff $\TT(\WW)\subseteq\WW$. A direct sum decomposition
 $
   \VV= \sum_{i=1}^m \VV_i
 $
is called $\TT$-\jdef{invariant} iff each of the summands
$\VV_i$ is $\TT$-invariant, that is, iff
 $$
   \TT(\VV_i)\subseteq\VV_i
 $$
for $i=1,2,\ldots,m$. A splitting
$
  \II=\sum_{i=1}^m\bPi_i
$
is called $\TT$-\jdef{invariant} iff the corresponding direct sum
decomposition is.

 \begin{proposition}[Invariance Theorem]\label{PROJ-inv}
  Let $\TT:\VV\to\VV$ be a linear map from a vector space
  $\VV$ to itself. Then a splitting
   $$
      \II=\bPi_1+\bPi_2+\cdots\bPi_m
    $$
   is $\TT$-invariant if and only if $\TT$ commutes with
   each of the summands:
    $$
       \TT\circ\bPi_i=\bPi_i\circ\TT
     $$
   for $i=1,2,\ldots,m$.
 \end{proposition}

\Proof{} Assume the commutation equations;
we prove that $\TT(\VV_i)\subseteq\VV_i$.
We need the fact that
$$
\vv\in\VV_i \iff \bPi_i(\vv)=\vv.
$$
Choose $\ww\in\VV_i$. Then
$$
 \bPi(\TT(\ww)) = \TT(\bPi_i(\ww))= \TT(\ww).
$$
This shows that $\TT(\ww)\in\VV_i$ as required.
The converse is just as easy.
If $\TT(\VV_i)\subseteq\VV_i$, then certainly
$$
  \TT\circ\bPi_i(\vv)=\bPi_i\circ\TT(\vv)
$$
for $\vv\in\VV_i$ since both sides equal $\TT(\vv)$.
Similarly this holds for $\vv\in\VV_j$ with $j\ne i$ since
then both sides are $\0$.
This means that it must hold for all $\vv\in\VV$
since every $\vv$ is a sum
$\vv=\ww_1+\ww_2+\cdots+ww_m$ where the formula
is true for $\vv=\ww_i$. \QED

 \begin{example}\rm
Let $\VV=\F^{2\times 1}$
and
 $$
   \VV_1=
\left\{\left[\begin{array}{c}x_1\\ 0 \end{array}\right]: x_1\in\F\right\},\;\;
   \VV_2=
\left\{\left[\begin{array}{c} 0 \\ x_2 \end{array}\right]: x_2\in\F\right\},
 $$
and $\TT(X)=AX$ the matrix map corresponding to the matrix
$A\in\F^{2\times 2}$ given by
 $$
   A=\left[\begin{array}{cc}a_{11}&a_{12} \\ a_{21}& a_{22}\end{array}\right]
 $$
we have that $\VV_1$ is $\TT$-invariant iff $a_{21}=0$ and the
decomposition $\VV=\VV_1\oplus\VV_2$ is $\TT$-invariant iff
$a_{12}=a_{21}=0$. The splitting corresponding to this direct sum
decomposition is  given by (the matrix maps determined by) the matrices
 $$
   \Pi_1=\left[\begin{array}{rr}1 & 0 \\ 0 & 0 \end{array}\right],\;\;
   \Pi_2=\left[\begin{array}{rr}0 & 0 \\ 0 & 1 \end{array}\right].
 $$
Note that
 $$
 \Pi_1A = \left[\begin{array}{ll} a_{11}& a_{12}\\ 0 & 0\end{array}\right],\;\;
  A\Pi_1= \left[\begin{array}{ll} a_{11}& 0 \\ A_{21} & 0 \end{array}\right],
 $$
so that $\Pi_1A=A\Pi_1$ iff $a_{12}=a_{21}=0$.
 \end{example}



\section{Block Diagonalization}
An invariant direct sum decomposition should
be viewed as a generalization of diagonalization.
We now explain this point. Let
 $$
   \VV=\VV_1\oplus\VV_2\oplus\cdots\oplus\VV_m
 $$
be a direct sum of the vector space $\VV$.
Given any linear maps
 $$
   \TT_i:\VV_i\to\VV_i
 $$
from the $i$-th summand of a direct sum decomposition to itself,
there is a unique map $\TT:\VV\to\VV$ from
 $\VV$ to itself characterized by the following two properties:
 \begin{description}
   \item[(1)] $\TT(\ww)=\TT_i(\ww)$ for $\ww\in\VV_i$, $i=1,2,\ldots,m$;
   \item[(2)] The decomposition $\VV=\bigoplus_{i=1}^m\VV_i$
              is $\TT$-invariant.
 \end{description}
We express these conditions with the formula:
$$
   \TT=\TT_1\oplus\TT_2\oplus\cdots\TT_m.
$$
This formula establishes a one-one   onto correspondence
between two sets: the set of all linear maps $\TT$
for which the direct sum decomposition $\VV=\bigoplus_i\VV_i$
is $\TT$-invariant and the set of all sequences
$(\TT_1,\TT_2,\ldots,\TT_m)$ of linear maps with
$\TT_i:\VV_i\to\VV_i$ for $i=1,2,\ldots,m$.
We call $\TT_i$ the \jdef{restriction} of $\TT$ to the
invariant summand $\VV_i$.


Here is a similar notation for matrices.
If $A_i\in\F^{n_i\times n_i}$ for $i=1,2,\ldots,m$
and $n=n_1+n_2+\cdots+n_m$,
then the notation
$$
    A = \diag(A_1,A_2,\ldots, A_m)
$$
means that $A\in\F^{n\times n}$ is the \jdef{block diagonal} matrix
 $$
    A=\left[\begin{array}{cccc}
          A_1 &     &         & \\
              & A_2 &         & \\
              &     & \ddots  & \\
              &     &         & A_m
    \end{array}\right]
  $$
with the indicated blocks on the diagonal. (The blank entries denote $0$.)
Thus, for example, if
 $$
    A_1 = \left[\begin{array}{cc} a&b\\ c&d \end{array}\right],\;\;
    A_2\left[\begin{array}{c}e\end{array}\right],
  $$
  then
   $$
      \diag(A_1, A_2) =
      \left[\begin{array}{ccc}a&b&0\\ c&d&0\\ 0&0&e\end{array}\right].
    $$
The relation between these concepts is given by

 \begin{theorem}[Block Representation]\label{PROJ-block}
  Assume that a direct sum decomposition is $\TT$-invariant.
  Then the matrix representing $\TT$ in any basis which
  respects this decomposition is block diagonal.
 \end{theorem}


\Proof{}  The assertion that the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$
respects the direct sum decomposition means that for each $i$ the subsequence
$$
(\bphi_{s_{i-1}+1},\bphi_{s_{i-1}+2},\ldots,\bphi_{s_i}) \eqno{(\clubsuit_i)}
$$
is a basis for the summand $\VV_i$.
For $s_{i-1}<k\le s_i$ we have $\bphi_k\in\VV_i$.
Hence $\TT(\bphi_k)\in\VV_i$ by $\TT$-invariance.
Since $(\clubsuit_i)$ is a basis for $\VV_i$ we obtain
 $$
   \TT(\bphi_k) = \sum_{j=s_{i-1}+1}^{s_i} a_{jk}\bphi_j\eqno(\#)
 $$
where $a_{jk}=\entry_{jk}(A)$ and $A$ represents
$\TT$ in the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$.
The equations~$(\sharp)$ show that $A$ is block diagonal
since they assert that $\entry_{jk}(A)=0$ unless
$j$ and $k$ lie in the same block of integers
$s_{i-1}+1,s_{i-1}+2,\ldots,s_i$. Note that
$A_i$ represents the linear map
$\TT_i:\VV_i\to\VV_i$ in the basis $(\clubsuit_i)$.
\QED


\section{Eigenspaces}
   Let $\TT:\VV\to\VV$ be a linear map from a vector space
$\VV$ to itself. For each $\lambda\in\F$ let
$\EIG_\lambda(\TT)$ be the subspace of $\VV$ defined by
 $$
   \EIG_\lambda(\TT) = \{\bphi\in\VV: \TT(\bphi)=\lambda\bphi\}.
 $$
This is the null space of $\TT-\lambda\II$:
 $$
   \EIG_\lambda(\TT) = \NULLSP(\TT-\lambda\II),
 $$
where $\II=\II_\VV$ is the identity map of $\VV$.
As in Section~\ref{subsec:diag} $\lambda$ is an \jdef{eigenvalue} of
$\TT$ iff $\EIG_\lambda(\TT)\ne\{\0\}$ and
the elements of $\EIG_\lambda(\TT)$ are the \jdef{eigenvectors}
of $\TT$ for this eigenvalue. We also call $\EIG_\lambda(\TT)$
the \jdef{eigenspace} of $\TT$ for the eigenvalue $\lambda$.

 \begin{proposition}\rm
The eigenspaces are $\TT$-invariant.
 \end{proposition}

\proof{} $\bphi\in\EIG_\lambda(\TT)
   \implies \TT(\bphi)=\lambda\bphi
   \implies \TT^2(\bphi)=\lambda\TT(\bphi)
   \implies \TT(\bphi)\in\EIG_\lambda(\TT)$. \QED

 \begin{theorem}[Eigenspace Decomposition]\label{PROJ-EIG}
 The map $\TT$ is diagonalizable iff
 $$
   \VV= \bigoplus_\lambda \EIG_\lambda(\TT)
 $$
where the direct sum is over all eigenvalues $\lambda$ of $\TT$.
 \end{theorem}

\Proof{}
Recall (see Definition~\ref{def:diagonalizable})
that a linear map $\TT$ is called \jdef{diagonalizable} iff
there is a basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ consisting
of eigenvectors of $\TT$. Suppose that
$\mu_1,\mu_2,\ldots,\mu_m$ are the distinct eigenvalues of $\TT$
and that the indexing is chosen so that
 $$
   \TT(\bphi_j)=\mu_i\bphi_j \mbox{ for $s_{i-1}<j\le s_i$.}
 $$
Then
 $$
   \EIG_{\mu_i}(\TT) =
   \Span\left(\bphi_{s_{i-1}+1},\bphi_{s_{i-1}+2},\ldots,\bphi_{s_i}\right)
 $$
which shows both that
 $$
  \VV=
 \EIG_{\mu_1}(\TT)\oplus \EIG_{\mu_2}(\TT)\oplus\cdots\oplus \EIG_{\mu_m}(\TT)
 $$
(as required)
and that the basis $(\bphi_1,\bphi_2,\ldots,\bphi_n)$ respects
this direct sum decomposition as in Theorem~\ref{PROJ-block}.
Conversely, if this eigenspace
decomposition is valid, then any basis
which respects this decomposition will consist of eigenvectors
of $\TT$. In particular, $\TT$ will be diagonalizable.
\QED

 \begin{corollary}\label{diag-cor}
    Suppose that $\TT:\VV\to\VV$ is diagonalizable.
    Then
    $$
       \TT = \sum_{i=1}^m \mu_i\bPi_i
     $$
   where $\mu_1,\mu_2,\ldots,\mu_m$ are the distinct eigenvalues
   of $\TT$ and
    $$
       \II = \sum_{i=1}^m \bPi_i
     $$
   is the splitting corresponding to the direct sum decomposition
   $$
      \VV = \sum_{i=1}^m \EIG_{\mu_i}(\TT).
    $$
  \end{corollary}

\section{Generalized Eigenspaces}
   Let $\TT:\VV\to\VV$ be a linear map from a vector space
$\VV$ to itself. For each $\lambda\in\F$ define a subspace
 $$
   \GEIG_\lambda(\TT) = \NULLSP((\TT-\lambda\II)^n).
 $$
Here $n$ is the dimension of $\VV$ and $\II=\II_\VV$ is the
identity map of $\VV$. The space $\GEIG_\lambda(\TT)$
is called the \jdef{generalized eigenspace} of $\TT$
for the eigenvalue $\lambda$ and its elements are called
\jdef{generalized eigenvectors}.

Our first step is to show that the integer $n$ in
the definition of $\GEIG_\lambda(\TT)$ may be replaced
by any integer $p\ge\dim(\VV)$ without affecting the definition.
We need the following


 \begin{lemma}
   Let $\NN:\VV\to\VV$ be a linear map and $\vv\in\VV$.
Suppose that $p$ is a positive integer with
$$
\NN^p(\vv)=\0,\;\; \NN^{p-1}(\vv)\ne\0.
$$
Then $p\le n$.
 \end{lemma}

\Proof{} By the Dimension Theorem it is enough to show
that the sequence of iterates
 $$
   (\vv,\NN(\vv),\NN^2(\vv),\ldots,\NN^{p-1}(\vv))
 $$
is independent. Suppose that
the numbers $c_0,c_1,c_2,\ldots,c_{p-1}$ satisfy
$$
  c_0\vv+c_1\NN(\vv)+c_2\NN^2(\vv)+\cdots+c_{p-1}\NN^{p-1}(\vv)=\0;
\eqno{(1)}
$$
we must show that $c_0=c_1=c_2=\cdots=c_{p-1}=0$.
Apply $\NN^{p-1}$ to~(1) gives $c_0\NN^{p-1}(\vv)\\0$
from which we conclude that $c_0=0$ so that~(1) simplifies to
$$
  c_1\NN(\vv)+c_2\NN^2(\vv)+\cdots+c_{p-1}\NN^{p-1}(\vv)=\0.
\eqno{(2)}
$$
Now we repeat the argument. Applying $\NN^{p-2}$ to~(2)
gives $x_1=0$ and so on. \QED

 \begin{corollary}
If $(\TT-\lambda\II)^p(\vv)=\0$ for some positive integer $p$,
then  $\vv$ is a generalized eigenvector.
 \end{corollary}

\Proof{} Take $\NN=\TT-\lambda\II$ in the lemma.\QED



 \begin{proposition}\label{prop:LLinvariant}
 The generalized eigenspaces are $\TT$-invariant.
 \end{proposition}

\Proof{} The equation
 $$
   \TT\circ(\TT-\lambda\II)^n(\bphi) =(\TT-\lambda\II))^n(\TT(\bphi))
 $$
implies that
 $$
   \bphi\in\GEIG_\lambda(\TT)\implies \TT(\bphi)\in\GEIG_\lambda(\TT).
 $$
\QED


Note that an ordinary eigenvector is a generalized eigenvector:
 $$
   (\TT-\lambda\II)(\bphi)=\0 \implies (\TT-\lambda\II)^n(\bphi)=\0.
 $$
(Here $\implies$ means {\em implies}.) The converse is not
true. For example, if $\VV=\F^{2\times 1}$ and $\TT$
is the matrix map corresponding to the matrix
 $$
   L=\left[\begin{array}{cc}\lambda&1\\ 0& \lambda\end{array}\right]
 $$
then $\lambda$ is the only eigenvalue of $\TT$, the eigenspace
is given by
 $$
   \EIG_\lambda(\TT) = \left\{
    \left[\begin{array}{c}x \\ 0 \end{array}\right]: x\in\F
                     \right\}
 $$
whereas every vector is a generalized vector
 $$
   \GEIG_\lambda(\TT) = \F^{2\times 1}
 $$
since
 $$
 (L-\lambda I)^2 = \left[\begin{array}{rr}0&1\\0&0\end{array}\right]^2=0.
 $$
There is however no distinction between eigenvalues and generalized
eigenvalues.

 \begin{theorem}
   The number $\lambda$ is an eigenvalue for $\TT$
  iff the corresponding generalized eigenspace $\GEIG_\lambda(\TT)$
is not the zero space:
 $$
   \EIG_\lambda(\TT)\ne\{\0\} \iff  \GEIG_\lambda(\TT)\ne\{\0\}.
 $$
 \end{theorem}

 \Proof{} One direction is easy since
  $
 \EIG_\lambda(\TT)\subseteq\GEIG_\lambda(\TT).
  $
For the converse suppose $\bphi\in \GEIG_\lambda(\TT)$
is non-zero. Then
 \begin{eqnarray*}
    (\TT-\lambda)^k(\bphi) &=&   \0 \mbox{ for $k=n$, but}\\
                          &\ne& \0  \mbox{ for $k=0$,}
 \end{eqnarray*}
so there is a largest value of $k$ with
$\bpsi=(\TT-\lambda)^{k-1}(\bphi)\ne\0$. Then
 $$
(\TT-\lambda)\bpsi= (\TT-\lambda)^k(\bphi) =\0
 $$
so $\bpsi\in\EIG_\lambda(\TT)$ and hence $\EIG_\lambda(\TT)\ne\{\0\}$
as required. \QED

 \begin{corollary}
   The only eigenvalue of the linear map
    $$
      \GEIG_\lambda(\TT)\to\GEIG_\lambda(\TT): \vv\mapsto \TT(\vv)
    $$
    is $\lambda$.
 \end{corollary}

 \Proof{} Suppose that $\bphi\in\GEIG_\lambda(\TT)$ satisfies
  $\TT(\bphi)=\mu\bphi$. Then $\bpsi=(\TT-\lambda\II)^{k-1}(\bphi)$
(from the last proof) also satisfies $\TT(\bpsi)=\mu\bpsi$.
But the last proof showed that $\TT(\bpsi)=\lambda\bpsi$
and $\bpsi\ne\0$ so $\lambda=\mu$.\QED

 \begin{question}\rm\Amark\ Show that
$$
\GEIG_\lambda(\TT)\cap\GEIG_\mu(\TT)=\{\0\}
$$
for $\lambda\ne\mu$.
(Answer: Otherwise (as in the proof)
 the intersection would contain an eigenvector for $\TT$.
The corresponding eigenvalue would be both $\lambda$ and $\mu$ which
is impossible.)
 \end{question}


 \begin{theorem}[Generalized Eigenspace Decomposition]\label{PROJ-GEIG}
Assume
$$
\F=\C
$$
the field of complex numbers.
Then any linear map
$$
\TT:\VV\to\VV
$$
has an $\TT$-invariant direct sum decomposition
 $$
   \VV= \bigoplus_\lambda \GEIG_\lambda(\TT)
 $$
where the direct sum is over all eigenvalues $\lambda$ of $\TT$.
 \end{theorem}

This theorem is an improvement over
the Eigenspace Decomposition of of Theorem~\ref{PROJ-EIG} in that
it works for {\em any} linear map, not just diagonalizable ones.
We have  already proved in Proposition~\ref{prop:LLinvariant}
that the decomposition is $\TT$-invariant.
We shall postpone the rest of the proof
to the next section. For the moment
we recast this theorem in the language of matrix theory.


 \begin{theorem}[Block Diagonalization]\label{thm:blk-diag}
   Any matrix $A\in\C^{n\times n}$ is similar
   to a block diagonal matrix where each of the blocks
   has a single eigenvalue. More precisely,
   suppose $\mu_1,\mu_2,\ldots,\mu_m$ are the distinct
   eigenvalues of $A$. Then there is an invertible matrix
   $P\in\C^{n\times n}$ such that
 $$
   P^{-1}AP = \diag(B_1,B_2,\ldots,B_m)
 $$
  where the matrix $B_i-\mu_i I$ is nilpotent for  $i=1,2,\ldots,m$.
 \end{theorem}

  \Proof{} We deduce this as a corollary
of the Generalized Eigenspace Decomposition.
We take $\VV=\C^{n\times 1}$ and $\TT=\Aa$ the matrix
map determined by $A$.
Choose any basis $(P_1,P_2,\ldots,P_n)$ which respects
the Generalized Eigenspace Decomposition, that is,
 $$
   \NULLSP((A-\mu_i I)^n) =
     \Span\left(P_{s_{i-1}+1},P_{s_{i-1}+2},\ldots,P_{s_i}\right)
 $$
where $0=s_0<s_1<\cdots<s_m=n$. Define $P$ by $\col_j(P)=P_j$.
Then $P^{-1}AP$ is the matrix representing $\TT=\Aa$ in the basis
$(P_1,P_2,\ldots,P_n)$. By Theorem~\ref{PROJ-block}, this matrix is block diagonal.
Since $B_i$ is the matrix representing the restriction
to the Generalized Eigenspace $\NULLSP((A-\mu_i I)^n)$
it follows that $B_i-\mu_i I$ is nilpotent.
\QED

 \begin{remark}\rm
   We deduced the Block Diagonalization Theorem from
   the Generalized Eigenspace Decomposition but it is just
   as easy to do the reverse. Let $A$ represent $\TT$
   in any basis. By the Block Diagonalization Theorem
   $A$ is similar to $P^{-1}AP$ which is in block diagonal form.
   By  Theorem ~\ref{REP-Sim} there
   is a basis for $\VV$ so that the matrix $P^{-1}AP$ represents
   the map $\TT$ in this basis. The elements of this basis
   are the generalized eigenvectors  required by
   Generalized Eigenspace Decomposition.
   We omit the details.
 \end{remark}


\section{Minimal Polynomial}
Let $\TT:\VV\to\VV$ be a linear map from
a finite-dimensional vector space $\VV$ to itself.
The space $\LMAP(\VV,\VV)$ of all linear maps
from $\VV$ to itself is a vector space of dimension
$n^2$ where $n$ is the dimension of $\VV$.
Hence for some $m\le n^2$ the sequence
 $$
    (\II,\TT,\TT^2,\TT^3,\ldots,\TT^m)
  $$
of powers of $\TT$ must be dependent. Thus
there are numbers $c_0,c_1,c_2,\ldots, c_m$,
not all zero, such that
$$
    c_0\II+c_1\TT+c_2\TT^2+\cdots+c_m\TT^m=0. \eqno{(\sharp)}
$$
Take the smallest value of $m$ for which the system
$(\sharp)$ has a non-trivial solution and form the polynomial
$$
   f(\xy)= c_0+c_1\xy+c_2\xy^2+\cdots+c_m\xy^m.
$$
Then equation~$(\sharp)$ can be written as
$$
 f(\TT)=0.
$$
Notice that since $m$ is smallest we must have $c_m\ne 0$
(else a smaller value of $m$ would work) so we can divide through
by it and assume that $c_m=1$.
The resulting polynomial is
called the \jdef{minimal polynomial}\label{minimal-def} for $\TT$.
Since $m$ is smallest, it follows that
$g(\TT)\ne 0$ for any non-zero polynomial of degree less
than $m$.

 \begin{theorem}[Minimal Polynomial Theorem]
   Assume that $\F=\C$. Then
   the eigenvalues of $\TT$ are the roots
   of the minimal polynomial $f$ of $\TT$
  \end{theorem}

\Proof{} Choose any number $\lambda$.
Divide the polynomial
$f(\xy)$ by the polynomial $\xy-\lambda$ to obtain a quotient
$g(\xy)$ of degree $m-1$:
$$
      f(\xy) = (\xy-\lambda)g(\xy) +c
$$
Here $c$ is a number (that is, a polynomial of degree zero).
Note that $c=0$  iff $f(\lambda)=0$, that is, iff $\lambda$
is a root of $f$.

First assume that $f(\lambda)=0$. Then $c=0$ so when we substitute
$\TT$ for $\xy$ we get
 Substitute $\TT$ for $\xy$:
$$
    \0 = f(\TT) = (\TT-\lambda \II)g(\TT).
$$
As $g(\xy)$ has smaller degree than $f(\xy)$ we have that
$g(\TT)\ne 0$.  Hence there is a $\ww\in\VV$ with
$g(\TT)(\ww)\ne \0$. Let $\vv=g(\TT)(\ww)$. Then
$$
\0= f(\TT)\ww= (\TT-\lambda \II)g(\TT)(\ww) = (\TT-\lambda\II)(\vv)
$$
which shows that $\lambda$ is an eigenvalue of $\TT$
with eigenvector $\vv$.

Conversely assume that $\lambda$ is an eigenvalue for $\TT$.
Then there is a non-zero $\vv\in\VV$ with $(\TT-\lambda\II)\vv=\0$.
Hence
$$
    \0 = f(\TT)(\vv)= g(\TT)(\TT-\lambda \II)(\vv) +c\vv=\0+c\vv=c\vv
$$
so $c=0$ and hence $f(\lambda)=0$ as required.
\QED


 \begin{corollary}[Eigenvalues Exist]\label{CEVEX}
    Assume that $\F=\C$. Then
    any linear  map $\TT:\VV\to\VV$
    has an eigenvector.
 \end{corollary}

\Proof{}
By the Fundamental Theorem of Algebra any complex polynomial
has a complex root. \QED


\begin{corollary} The minimal polynomial
$f$ of $\TT$ has the form
    $$
       f(\xy)= (\xy-\mu_1)^{p_1}(\xy-\mu_2)^{p_2}\cdots(\xy-\mu_m)^{p_m}
     $$
where $\mu_1,\mu_2,\ldots,\mu_m$ be the distinct eigenvalues of $\TT$
and the exponents $p_k$ are positive integers.
\end{corollary}


\Proof{ of Theorem~\ref{PROJ-GEIG}}
We now prove the
Generalized  Eigenspace Decomposition Theorem.
Assume that $\F=\C$, that $\VV$ is a finite dimensional
vector space, and that $\TT:\VV\to\VV$ is a linear map
from $\VV$ to itself. Let $\mu_1,\mu_2,\ldots,\mu_m$ be
the distinct eigenvalues of $\TT$ and denote by
$\VV_1,\VV_2,\ldots,\VV_m$ the corresponding generalized
eigenspaces:
 $$
    \VV_k = \GEIG_{\mu_k}(\TT)
  $$
for $k=1,2,\ldots,m$.

 Let $f_k(\xy)$ be the minimal polynomial of the linear map
  $$
     \VV_k\to\VV_k: \vv\mapsto \TT(\vv). \eqno{(\natural)}
  $$
 Let $g_k(\xy)=\prod_{j\ne k} f_j(\xy)$ be the product
 of all the $f_j(\xy)$ with $j\ne k$:
  $$
     g_k(\xy)= f_1(\xy)\cdots f_{k-1}(\xy) f_{k+1}(\xy)\cdots f_m(\xy).
  $$

 \begin{lemma} The map
  $$
     \VV_k\to\VV_k: \vv\mapsto g_k(\TT)(\vv)
  $$
  is an isomorphism, but
   $$
      g_k(\TT)(\vv)=\0 \mbox{ for $\vv\in\VV_j$ with $j\ne k$.}
   $$
  \end{lemma}

\Proof{}
In the last section we noted that the only eigenvalue of
this map is $\mu_k$ so $f_k$ must have the form
 $$
    f_k(\xy) = (\xy-\mu_k)^{p_k}.
  $$
For $j\ne k$ the map
 $$
    \VV_k\to\VV_k: \vv\mapsto (\TT-\mu_j\II)(\vv)
  $$
is an isomorphism, else $\mu_j$ would be
an eigenvalue for the map $(\natural)$.
If we raise this map to the $p_j$-th power
and then multiply the results together for $j\ne k$
we obtain the first part of the lemma. (a composition of
isomorphism is an isomorphism). The second part of the
lemma is trivial, since $f_j(\TT)(\vv)=\0$ for
$\vv\in\VV_j$ and $f_j(\xy)$ is a factor of $g_k(\xy)$.
\QED

\medskip

{\it We resume the proof of Theorem~\ref{PROJ-GEIG}.} Let
$$
   \WW = \VV_1+\VV_2+\cdots+\VV_m
$$
be the sum of all these
spaces $\VV_k$; that is, $\ww\in\WW$ if and only if
there exist vectors $\vv_k\in\VV_k$ with
$$
   \ww=\vv_1+\vv_2+\cdots+\vv_m.
$$
We must show two things:
$$
   \WW = \VV_1\oplus\VV_2\oplus\cdots\oplus\VV_m \eqno{(1)}
$$
and
$$
       \WW=\VV. \eqno{(2)}
$$

{\em We prove~(1).} Suppose that
$$
   \0=\vv_1+\vv_2+\cdots+\vv_m
$$
where $\vv_k\in\VV_k$. Apply $g_k(\TT)$ to both sides.
By the second part of the lemma $\0=g_k(\TT)(\vv_k)$.
Hence $\vv_k=\0$ by the first part of the lemma.

{\em We prove~(2).} Assume~(2) is false, that is, that $\WW\ne\VV$.
Choose any complement $\UU$ to $\WW$ in $\VV$,
 $$
    \VV=\WW\oplus\UU
 $$
and let $\biota:\UU\to\VV$ denote the inclusion and
$\bpi:\VV\to\UU$ the projection onto $\UU$ along $\WW$, i.e.
$$
   \biota(\uu) =\uu, \qquad \bpi(\uu+\ww)=\ww
$$
for $\uu\in\UU$ and $\ww\in\WW$.
Let $\lambda$ be an eigenvalue for
 $$
    \bpi\circ\TT\circ\biota:\UU\to\UU
  $$
and let $\uu\in\UU$ be the corresponding eigenvector.
Then
 \begin{eqnarray*}
  \bpi\circ\TT\circ\iota(\uu)=\lambda\uu &&\mbox{ so } \\
  \bpi(\TT(\uu)-\lambda\uu)=\0             && \mbox{ so }\\
  \TT(\uu)-\lambda\uu\in\NULLSP(\bpi)=\WW &&
 \end{eqnarray*}
where we have used $\biota(\uu)=\bpi(\uu)=\uu$ which follows
from $\uu\in\UU$. From the definition of $\WW$ we obtain
$$
\TT(\uu)-\lambda\uu = \ww_1+\ww_k+\cdots+\ww_m \eqno{(3)}
$$
where $\ww_k\in\VV_k$.

We distinguish two cases. In case $\lambda$  is not an eigenvalue
then the linear map
 $$
   \VV_k\to\VV_k: \vv\mapsto (\TT-\lambda\II)(\vv)
 $$
is invertible for each $k=1,2,\ldots,m$ so we may choose
$\vv_k\in\VV_k$ satisfying
$$
(\TT-\lambda\II)(\vv_k)=\ww_k \eqno{(4)}
$$
so~(3) may be written as
 $$
   (\TT-\lambda\II)(\uu-\vv_1-\vv_2-\cdots-\vv_m)=\0.
 $$
As $\lambda$ is not an eigenvalue, $(\TT-\lambda\II)$ is invertible
so we may cancel it in the last equation and obtain
 $$
   \uu-\vv_1-\vv_2-\cdots-\vv_m=\0.
 $$
But $\uu\ne\0$ so this contradicts
$$
 \VV=\UU\oplus\WW=\UU\oplus\VV_1\oplus\VV_2\oplus\cdots\oplus\VV_m.
\eqno{(5)}
$$

The second case is that $\lambda$ {\em is} an eigenvalue of $\TT$,
say $\lambda=\mu_1$. We may still find $\vv_k\in\VV_k$
satisfying~(4) for $k=2,3,\ldots,m$ so we may write~(3) as
 $$
   (\TT-\lambda\II)(\uu-\vv_2-\cdots-\vv_m)=\ww_1.
 $$
As $\ww_1\in\VV_1$ we obtain
 $$
   (\TT-\lambda\II)^p(\uu-\vv_2-\cdots-\vv_m)=\0
 $$
for sufficiently large $p$ and hence that
 $$
   \uu-\vv_2-\cdots-\vv_m \in \VV_1.
 $$
But this also contradicts~(5). \QED


\section{Exercises}% Block Diagonalization

 \begin{exercise}\rm Suppose that
  $\TT:\VV\to\WW$ is an isomorphism and
that $\VV=\VV_1\oplus\VV_2$. Show that
$\WW=\WW_1\oplus\WW_2$ where
$\WW_1=\TT(\VV_1)$ and $\WW_2=\TT(\VV_2)$.
 \end{exercise}

 \begin{exercise}\rm\Amark\ \label{ex:product}
  Given two vector spaces $\WW$ and $\UU$, the
  \jdef{direct product} $\WW\times\UU$ of $\WW$ and $\UU$ is the
  set of all pairs $(\ww,\uu)$ with $\ww\in\WW$
  and $\uu\in\UU$:
   $$
     \WW\times\UU=\{(\ww,\uu): \ww\in\WW,\;\uu\in\UU\}.
   $$
  We make $\WW\times\UU$  into a vector space by defining the vector
  space operations via the following rules:
 \begin{eqnarray*}
     (\ww_1,\uu_1)+(\ww_2,\uu_2) &=& (\ww_1+\ww_2,\uu_1+\uu_2)\\
     (a\ww,\uu) &=& (a\ww,a\uu)\\
      \0_{\WW\times \UU} &=& (\0_\WW,\0_\UU).
 \end{eqnarray*}
  Suppose that $\WW$ and $\UU$ are subspaces of $\WW$.
  Show that $\VV=\WW\oplus\UU$ if and only if the linear
  map
   $$
     \WW\times \UU\to\VV: (\ww,\uu)\mapsto \ww+\uu
   $$
   is an isomorphism.
 \end{exercise}

   \begin{exercise}\rm
    Let $\WW$ and $\UU$ be subspaces of a vector space $\VV$.
    Define the \jdef{sum} $\WW+\UU$
    and \jdef{intersection} $\WW\cap\UU$ of $\WW$ and $\UU$ by
     \begin{eqnarray*}
       \WW+\UU &=& \{\ww+\uu : \ww\in\WW,\;\UU\in\UU\}\\
       \WW\cap\UU &=& \{ \vv\in\VV : \vv\in\WW \mbox{ and } \vv\in\UU\}.
     \end{eqnarray*}
    Show that
     \begin{description}
       \item[(1)] $\WW+\UU$ and $\WW\cap\UU$ are subspaces of $\VV$.
       \item[(2)] $\WW+\UU=\WW\oplus\UU$ iff $\WW\cap\UU=\{\0\}$.
       \item[(3)] $\dim(\WW+\UU)+\dim(\WW\cap\UU)=\dim(\WW)+\dim(\UU)$.
     \end{description}
   \end{exercise}

    \begin{exercise}\rm\Amark\ Let $A,B\in\F^{2\times 4}$ be defined by
      \begin{eqnarray*}
       A&=& \left[\begin{array}{rrrr}1&2&3&4\\ 4&3&2&1\end{array}\right]\\
       B&=& \left[\begin{array}{rrrr}1&2&3&4\\ 3&4&1&2\end{array}\right]
      \end{eqnarray*}
     Let $\VV=\F^{4\times 1}$. Find $\WW+\UU$ and $\WW\cap\UU$
     if $\WW=\NULLSP(A)$ and $\UU=\NULLSP(B)$.
     (Here $\NULLSP$ denotes {\em null space}.)
   \ifanswer %
      \begin{eqnarray*}
        \VV+\UU    &=&
          \NULLSP(\left[\begin{array}{rrrr}1&2&3&4\end{array}\right])\\
        \VV\cap\UU &=&
          \NULLSP(C) \mbox{ where } \\
            C      &=&
        \left[\begin{array}{rrrr}1&2&3&4\\ 4&3&2&1\\ 3&4&1&2\end{array}\right]
      \end{eqnarray*}
       \fi
    \end{exercise}


 \begin{exercise}\rm
Suppose that $\TT:\VV\to\WW$ is a linear map
and that $\SS:\WW\to\VV$ is a right inverse to $\TT$:
 $$
    \TT\circ\SS=\II_\WW.
 $$
Show that $\SS\circ\TT$ is an idempotent on $\VV$ and that
the corresponding direct sum decomposition is given by
 $$
   \VV=\WW\oplus\UU
 $$
where
 \begin{eqnarray*}
   \WW &=&\Range(\SS\circ\TT)= \Range(\SS)\\
   \UU &=& \NULLSP(\SS\circ\TT) = \NULLSP(\TT).
 \end{eqnarray*}
 \end{exercise}

   \begin{exercise}\rm
    For any subset $K\subseteq\{1,2,\ldots,n\}$ define
   a matrix $\IC{n}{K}\in\F^{n\times n}$ by
    $$
      \IC{n}{K} = \diag(e_1,e_2,\ldots,e_n)
    $$
    where
     $$
       e_j = \left\{ \begin{array}{ll}
                       1 & \mbox{ if $j\in K$,}\\
                       0 & \mbox{ if $j\notin K$.}
                      \end{array}\right.
     $$
    For example
     $$
       \IC{n}{K} = \left[\begin{array}{lll}
                         1&0&0\\ 0&0&0\\ 0&0&1
                   \end{array}\right]
     $$
    when  $n=3$ and $K=\{1,3\}$.
     \begin{description}
       \item[(1)] Show that $\IC{n}{K}$ is an idempotent.
       \item[(2)] Show that the rank of $\IC{n}{K}$ is the cardinality of $K$.
       \item[(3)] Show that $\IC{n}{K}\IC{n}{H}=\IC{n}{K\cap H}$.
       \item[(4)] Show that $\IC{n}{K}$ and $\IC{n}{H}$ are disjoint
              idempotents iff $H$ and $K$ are disjoint sets,
              that is, $H\cap K=\emptyset$.
       \item[(5)] Prove that
        $
           \IC{n}{K\cup H}+\IC{n}{K\cap H} = \IC{n}{K}+\IC{n}{H}.
        $
     \end{description}

   \end{exercise}



\chapter{Jordan Normal Form}\label{SIM}


In this chapter we will  find a complete
system of invariants that characterize similarity. This means
a collection of nonnegative integers $ \rho_{\lambda,k}(A)$ --
defined for each square matrix $A$, each positive integer $k$,
and each complex number $\lambda$ --
such that for $A,B\in\C^{n\times n}$, we have
that $A$ and $B$ are similar if and only if
 $$
  \rho_{\lambda,k}(A) = \rho_{\lambda,k}(B)\qquad
  \mbox{ for all $\lambda\in\C$ and all $k=1,2,\ldots$}.
 $$
We will prove a normal form theorem for similarity called the
{\em Jordan Normal Form Theorem}.



\section{Similarity Invariants}

\begin{definition}\rm \label{def:eigenrank}
 Let $A\in\C^{n\times n}$, $\lambda\in\C$,
and $k=1,2,3,\ldots$. Define
$$
\rho_{\lambda,k}(A) = \mathrm{rank} \,(\lambda I - A)^k
$$
where $I=I_n$ is the $n\times n$ identity matrix.
The integer $\rho_{\lambda,k}(A)$ is called the $k$th
\jdef{eigenrank} of $A$ for the eigenvalue $\lambda$.
\end{definition}


 \begin{remark}\rm
 If $\lambda$ is not an eigenvalue of $A$,
then $\rho_{\lambda,k}(A)=n$.
 If $k\ge n$,
 $\rho_{\lambda,k}(A)=\rho_{\lambda,n}(A)$.
 (See Exercise~\ref{14.8} below.)
 Thus only finitely many of these numbers are of interest.
  \end{remark}


 \begin{definition}\rm \label{eignul}
The \jdef{eigennullities}
 $\nu_{\lambda,k}(A)$ of the matrix $A$
are defined by
$$
    \nu_{\lambda,k}(A)
      =\mathrm{nullity}((\lambda I-A)^k)
      = \dim\, \NULLSP((\lambda I-A)^k)
$$
From the Rank Nullity Relation~\ref{RNR} {(\em rank + nullity $= n$)},
we obtain
$$
    \rho_{\lambda,k}(A) + \nu_{\lambda,k}(A) = n \eqno{(*)}
$$
  for $A\in\C^{n\times n}$.
Hence, the eigennullities and eigenranks  contain the same information.
\end{definition}


\begin{remark}\rm\label{rmk:eignull}
 The eigennullity
 $$
    \nu_{\lambda,1}(A)=\dim\,\NULLSP(\lambda I - A)=\dim\,\EIG_\lambda(A)
 $$
is called the \jdef{geometric multiplicity} of the eigenvalue $\lambda$.
\index{multiplicity, geometric}
It is the dimension of the eigenspace $\EIG_\lambda(A)$.
The eigennullity
$$
\nu_{\lambda,n}(A)=\dim\,\NULLSP(\lambda I - A)^n =\dim\,\GEIG_\lambda(A)
$$
 is  called the \jdef{algebraic multiplicity}
\index{multiplicity, algebraic}
 of $\lambda$ for  $A$.
It is the dimension of the generalized eigenspace $\GEIG_\lambda(A)$.
For a diagonalizable matrix these two multiplicities are the same.
 \end{remark}

\begin{theorem}[Invariance] \label{SE}
 Similar matrices have the same eigenranks.
\end{theorem}


\Proof{}
There are three key points:
(1)~ Similar matrices are {\em a fortiori}
equivalent (see Exercise~\ref{exer:rank}),
for if $A=PBP^{-1}$, then $A=QBP^{-1}$
where $Q=P$.
(2)~Similar matrices have similar powers,
for $(PBP^{-1})^k=PB^kP^{-1}$.
(3)~If $A$ and $B$ are similar
so are $\lambda I-A$ and $\lambda I-B$
since $P(\lambda I-B)P^{-1}=\lambda I-PBP^{-1}$.

Now assume that $A$ and $B$ are similar.
Then $A=PBP^{-1}$ where $P$ is invertible.
Choose $\lambda\in\C$.
Then $\lambda I-A=P(\lambda I-B)P^{-1}$. Hence,
$(\lambda I-A)^k=P(\lambda I-B^kP^{-1}$ for $k=1,2,\ldots\,$.
By Exercise~\ref{exer:rank},
the matrices $(\lambda I-A)^k$ and $(\lambda I-B)^k$ have the same rank.
By the definition of $\rho_{\lambda,k}$,
we have $\rho_{\lambda,k}(A) = \rho_{\lambda,k}(B)$, as required.
\QED

\begin{remark}\rm
Of course, by equation~$(*)$ of Definition~\ref{eignul},
similar matrices have the same eigennullities as well.
% Thus, by Remark~\ref{rmk:eignull},
% Theorem~\ref{SE} generalizes Theorem~\ref{geom-sim}.
Below (Corollary~\ref{SIM-CHAR}), we will prove the converse
to Theorem~\ref{SE}.
\end{remark}




 \begin{exercise}\rm Prove that
  a matrix $A\in\C^{n\times n}$ is diagonalizable
  if and only if
  $\rho_{\lambda,k}(A)=\rho_{\lambda,1}(A)$ for all
  eigenvalues $\lambda$ of $A$ and all $k=1,2,3,\ldots.$
 \end{exercise}

\begin{exercise}\rm \label{14.8}  Prove that
 $\rho_{\lambda,k}(A)=\rho_{\lambda,n}(A)$ if $k\ge n$.
\ifanswer By the block diagonalization Theorem~\ref{M-TBDF},
we may assume that  $A\in\C^{n\times n}$ is in MTBDF:
$$
    A= \diag(\Lambda_1, \Lambda_2, \ldots, \Lambda_m)
$$
where each $\Lambda_j$ has the single eigenvalue $\lambda_j$.
The matrix $\lambda I-\Lambda_j$ is invertible if $\lambda\ne\lambda_j$
and nilpotent if $\lambda=\lambda_j$. Hence,
$$
    (\lambda I - A)^k= \diag(C_1, C_2, \cdots, C_m)
$$
where $C_j=(\lambda I - \Lambda_j)^k$ is invertible for $\lambda\ne\lambda_j$
and zero for $\lambda=\lambda_j$ and $k\ge n_j$ where $\Lambda_j$ has size
$n_j\times n_j$. Thus $\rho_{\lambda,k}(A)=n$ for $\lambda\ne\lambda_j$
and $\rho_{\lambda,k}(A)=n-n_j$ for $\lambda=\lambda_j$ and $k\ge n_j$.
\fi
\end{exercise}

\section{Jordan Normal Form}

We can improve the
Block Diagonalization Theorem~\ref{thm:blk-diag}
considerably by making further similarity transformations within
each block. The resulting blocks will be almost diagonal
except for a few nonzero entries above the diagonal.
Here are the precise definitions.

The entries $\entry_{ii}(A)$ of a matrix $A$
are called the \jdef{diagonal entries},  and said to be
{\em on the diagonal}. The entries $\entry_{i,i+1}(A)$
are called the
\jdef{superdiagonal} entries, and said to lie on the
{\em on the superdiagonal}.
The superdiagonal entries lie just above the diagonal.
A \jdef{Jordan block}
 is a square matrix $\Lambda$ having
 all its diagonal entries equal,
 zeros or ones on the superdiagonal, and
 zeros elsewhere.
 Thus $\Lambda$ is a Jordan block iff
\begin{eqnarray*}
 \entry_{ii}(\Lambda)    &=& \mbox{$\lambda$,} \\
 \entry_{i,i+1}(\Lambda) &=& \mbox{$0$ or $1$,}\\
  \entry_{ij}(\Lambda)   &=& \mbox{$0$ if $j\ne i,i+1$.}
\end{eqnarray*}


\begin{definition}{Jordan Normal Form} % Normal Form Definition
A matrix $J$ is in \jdef{Jordan normal form} %,abbreviated \jdef{JNF},
iff it is in  block diagonal form
$$
   J= \diag(\Lambda_1,\Lambda_2,\ldots, \Lambda_m )
$$
where each $\Lambda_k$ is a Jordan block.
\end{definition}

\begin{example}\rm The $6\times 6$ matrix
$$
   J=
    \Mat{llllll}
     \lambda_1 & e_1   &  0    &      &      & \\
     0     & \lambda_1 & e_2   &      &      & \\
     0     & 0     & \lambda_1 &      &      & \\
           &       &      & \lambda_2 &      &  \\
           &       &      &       & \lambda_3 & e_3\\
           &       &      &       & 0     & \lambda_3
    \Rix
$$
is in Jordan normal form provided that each of the
superdiagonal entries $e_1,e_2,e_3$ is either zero or one.
\end{example}

 \begin{theorem}[Jordan Normal Form]\label{JNF-thm}
Every square matrix $A$ is similar to a matrix $J$
in Jordan normal form.
 \end{theorem}


In other words, any square matrix $A$ may be written in the form
$$
     A = PJP^{-1}
$$
where $P$ is invertible and $J$ is in Jordan normal form.
By the
Block Diagonalization Theorem~\ref{thm:blk-diag},
we can  assume that the matrix $A$ is
block diagonal.
We can work a block at a time, so it is enough
to prove the theorem for  matrices with only one eigenvalue.
As the matrices $\lambda I+V_1$ and $\lambda I+V_2$ are similar
if and only if the matrices $V_1$ and $V_2$ are, it is enough
to prove the theorem for nilpotent
(in fact, strictly upper triangular) matrices.
The proof will occupy most of the rest of this chapter.


\section{Indecomposable Jordan Blocks}
In  this section we'll prove a special case
of the Jordan Normal Form Theorem~\ref{JNF-thm}
as a warmup.
The ideas in the general case are similar.
We'll make a preliminary definition.

An \jdef{indecomposable Jordan block} is one where
all the entries on the superdiagonal are one.
It has the form $\lambda I+W$ where
$$
     \entry_{ij}(W) = \left\{\begin{array}{ll}
                1 & \mbox{ if $j=i+1$,}\\
                0 & \mbox{ otherwise.}
             \end{array}\right.
$$
Notice that $W$ is itself an indecomposable Jordan block
(with eigenvalue zero).   A Jordan block has form
$$
 \Lambda=  \diag(\lambda I+W_1,\; \lambda I+W_2,\;\ldots ,\;\lambda I+W_k)
$$
  where the matrices $\lambda I+W_1, \lambda I+W_2,\ldots,\lambda I+W_k$
  are indecomposable Jordan blocks.\footnote{The terminology
   here is at slight variance with the general usage.
   Most authors call {\em Jordan block} what we
   have called {\em indecomposable Jordan block}.
}
For example, the Jordan block
$$
   \Lambda=
    \Mat{llllll}
     \lambda & 1   & 0   &     &      & \\
     0   & \lambda & 1   &     &      & \\
     0   & 0   & \lambda &     &      & \\
         &     &     & \lambda &      &  \\
         &     &     &     & \lambda & 1\\
         &     &     &     & 0   & \lambda
    \Rix
$$
has form
 $$
    \Lambda =  \diag(\lambda I+W_1,\;\lambda I+W_2,\;\lambda I+W_3)
  $$
where the constituent indecomposable Jordan blocks are
$$
   \lambda I+W_1=
    \Mat{lll}
     \lambda & 1   & 0    \\
     0   & \lambda & 1    \\
     0   & 0   & \lambda
    \Rix,\;\;
   \lambda I+W_2=
    \Mat{l}
         \lambda
    \Rix,\;\;
   \lambda I+W_3=
    \Mat{ll}
          \lambda & 1\\
          0   & \lambda
    \Rix.
$$

 \begin{question}\rm
  What are the eigenranks of this last matrix $\Lambda$?
 (Answer: $\rho_{\mu,k}(\Lambda)=6$ for $\mu\ne\lambda$,
 $\rho_{\lambda,1}(\Lambda)=3$,
 $\rho_{\lambda,2}(\Lambda)=1$, and
 $\rho_{\lambda,k}(\Lambda)=0$ for $k>2$.)
 \end{question}

 \begin{theorem}
Let $N\in\F^{n\times n}$ be a matrix of size $n\times n$ and
degree of nilpotence $n$, i.e.
that $N^n=0$ but $N^{n-1}\ne 0$.
Then $N$ is similar to the indecomposable
$n\times n$ Jordan block $W$.
\end{theorem}

\Proof{} Since $N^n=0$ but $N^{n-1}\ne 0$,
there is a vector $X\in\F^{n\times 1}$
such that $N^n X=0$ but $N^{n-1}X\ne 0$.
Form the matrix $P$ whose $j$th column is $N^{n-j}X$.
We will prove that
$$
    NP=PW.
$$
Then we will show that $P$ is invertible.
Multiplying on the right by $P^{-1}$ gives
$$
   N=PWP^{-1}.
$$


{\em We prove that $NP=PW$, i.e. that
$$
   \col_j(NP)=\col_j(PW)
$$
for $j=1,2,\ldots, n$.} By the definition of $P$,
 $$
   P=\Mat{llcll}
       N^{n-1}X &N^{n-2}X & \cdots & NX & X
      \Rix,
 $$
so
 $$
   NP=\Mat{llcll}
       0 &N^{n-1}X & \cdots & N^2X & NX
      \Rix=PW,
 $$
so
 \begin{eqnarray*}
 \col_1(NP)&=&0,\\
 \col_j(NP)&=&\col_{j-1}(P)\mbox{ for $j=2,3,\ldots,n$.}
 \end{eqnarray*}
On the other hand,
the first column of $W$ is zero, and
the $j$th column of $W$ is
the $(j-1)$st column of the identity matrix.
Thus
 \begin{eqnarray*}
 \col_1(PW)&=&0,\\
 \col_j(PW)&=&\col_{j-1}(P)\mbox{ for $j=2,3,\ldots,n$.}
 \end{eqnarray*}
This proves that $NP=PW$.

{\em We prove that $P$ is invertible.} It is enough
to show that its columns are  independent.
 Suppose
 $$
   0=c_1N^{n-1}X +c_2N^{n-2}X+\cdots + c_{n-1} NX +c_nX.
 $$
Since $N^k=0$ for $k\ge n$ we may apply $N^{n-1}$ to
both sides and obtain that $c_nN^{n-1}X=0$.
But $N^{n-1}X\ne 0$ so $c_n=0$.
Now apply $N^{n-2}$ to both sides to prove that $c_{n-1}=0$.
Repeating in this way
we obtain that $c_1=c_2=\cdots=c_n=0$, as required.
\QED




\section{Partitions}
A little terminology from number theory is useful in describing
the relations among the various eigennullities of a nilpotent
matrix.

A \jdef{partition} of a positive integer $n$ is a nonincreasing
sequence $\pi$ of positive integers which sum to $n$, that is,
 $$
   \pi = (n_1,n_2,\ldots,n_m)
 $$
where
 $$
     n_1\geq n_2\geq \cdots\geq n_m \geq 1
 $$
and
 $$
      n_1+n_2+\cdots+n_m = n.
 $$

A partition $\pi=(n_1,n_2,\ldots,n_m)$ can be used to construct
a diagram of stars called a \jdef{tableau}.
The tableau consists of
$n=n_1+n_2+\cdots+n_m$ stars arranged in $m$ rows with the $k$th row having $n_k$ stars.
The stars in a row are left justified so that
the $j$th columns align.
The $j$th column of the tableau
intersects the $k$th row exactly when $j\le n_k$.
The \jdef{dual partition} $\pi^*$ of $\pi$
is obtained by forming the transpose of this tableau.
Thus $\pi^*=(\ell_1,\ell_2,\ldots,\ell_p)$ where
$\ell_j$ is the number of indices $k$ with $j\le n_k$.
For example, if
$$
\pi=(5,5,4,3,3,3,1),
$$
 then the tableau is
 $$
   \begin{array}{ccccc}
      \star & \star & \star & \star & \star \\
      \star & \star & \star & \star & \star \\
      \star & \star & \star & \star &       \\
      \star & \star & \star &&       \\
      \star & \star & \star &&       \\
      \star & \star & \star &&       \\
      \star &&&&
   \end{array}
 $$
and the dual partition
 $$
   \pi^* = (7,6,6,3,2)
 $$
is obtained by counting the number of entries in successive columns.
The dual of the dual is the original partition:
 $$
       \pi^{**}=\pi.
$$


\section{Weyr Characteristic}
Let $N\in\F^{n\times n}$ be a nilpotent matrix, and
let $p$ be the \jdef{degree of nilpotence} of $N$.
This is the least integer for which $N^p=0$:
 $$
   N^p=0,\qquad N^{p-1}\ne 0.
$$
Recall that the $k$th \jdef{eigenrank} of $N$ is the integer
 $$
      \rho_k(N) = \mathrm{rank}(N^k) = \dim\,\Range(N^k).
$$
We have dropped the subscript $\lambda$
since $N$ is nilpotent: its only eigenvalue is zero.
The sequence of integers $\omega=(\ell_1,\ell_2,\ldots,\ell_p)$ defined by
$$
  \ell_k = \rho_{k-1}(N) - \rho_k(N)
  $$
for $k=1,2,\ldots,p$ is called the
\jdef{Weyr characteristic} of of the nilpotent matrix $N$.

\begin{theorem} The Weyr characteristic of
a nilpotent matrix $N\in\F^{n\times n}$ is a
partition of $n$.
\end{theorem}

\Proof{} Successive terms $\ell_k$ and $\ell_{k+1}$
in the sum $\ell_1+\cdots+\ell_p$  contain $\rho_k(N)$
with opposite signs. Hence, the sum ``telescopes'':
$$
\ell_1+\ell_2+\cdots+\ell_p= \rho_0(N)-\rho_p(N) = n-0 = n
$$
as $N^0=I$  and $N^p=0$. To show that $\ell_k\ge \ell_{k+1}$,
first note the obvious inclusion of ranges
$$
 \Range(N^k)\subseteq\Range(N^{k-1}).
$$
This holds because  $N^kX=N^{k-1}(NX)$.
Let $\Phi$ be a frame for the subspace
$\Range(N^k)$,  and extend it to a frame $\Psi$ for
$\Range(N^{k-1})$ by adjoining additional columns $\Upsilon$:
$$
\Psi=\Mat{rr}\Phi & \Upsilon \Rix.
$$
Then $\Psi$ has $\rho_{k-1}(N)$ columns,
     $\Phi$ has $\rho_k(N)$ columns, and
     $\Upsilon$ has $\ell_k$ columns.
Now
$$
\Range(N^{k-1})=\Range(\Psi), \qquad \Range(\N^k)=\Range(\Phi),
$$
so
$$
\Range(N^k)=\Range(N\Psi),\qquad \Range(N^{k+1})=\Range(N\Phi).
$$
Discard some columns from $N\Phi$ to make a basis
$\tilde{\Phi}$ for $\Range(N^{k+1})$, and then discard some
columns from
$$
  N\Psi=\Mat{rr}N\Phi & N\Upsilon \Rix,
$$
so that
$$
\Tilde{\Psi}=\Mat{rr}\Tilde{\Phi} & \Tilde{\Upsilon} \Rix
$$
is a basis for $\Range(N^k)$. Then $\Tilde{\Upsilon}$ has $\ell_{k+1}$ columns.
 Since the discarded columns were taken from $\Upsilon$,
 it follows that $\ell_{k+1}\le\ell_k$,  as required. \QED

\section{Segre Characteristic}
For each $k$ let $W_k\in\F^{k\times k}$ denote
the $k\times k$ indecomposable Jordan block with
eigenvalue zero:
 \begin{eqnarray*}
   \entry_{ij}(W_k)    &=& 0\;\; \mbox{ if $j\ne i+1$}\\
   \entry_{i,i+1}(W_k) &=& 1 \;\; \mbox{ for $i=1,2,\ldots,k-1$.}
 \end{eqnarray*}
The subscript  $k$  indicates the size of the matrix $W_k$.
For each partition
$$
\pi=(n_1,n_2,\ldots,n_m)
$$
denote by $W_\pi$ the
Jordan block  given by  the block diagonal matrix
 $$
     W_\pi = \diag(W_{n_1},W_{n_2},\ldots W_{n_m}).
 $$
A matrix of form $W_\pi$ is called a  \jdef{Segre matrix}.


A Segre matrix is in Jordan normal form.
Conversely,
any nilpotent matrix in Jordan normal form can be  transformed
to a Segre matrix by permuting the blocks so that their sizes
decrease along the diagonal. (This can be accomplished by replacing
$J$ by $PJP^{-1}$ for a suitable permutation matrix $P$.)

Now define the \jdef{Segre characteristic} of a nilpotent
matrix to be the dual partition
$$
  \pi = \omega^*
$$
of the Weyr characteristic $\omega$. The key to understanding the Jordan
Normal Form Theorem is the following

\begin{theorem} \label{Seg-thm}
The Segre characteristic of the
Segre matrix $W_\pi$  is $\pi$.
\end{theorem}



An example is more convincing than a general proof.
Let
$$
\pi=(3,2,2,1),\qquad \omega=\pi^*=(4,3,1),
$$
 so
$$
W=W_\pi = \diag(W_3, W_2, W_2,W_1).
$$
 Written in full this is
$$
W=\Mat{rrrrrrrr}
   0 & 1 & 0 &   &   &   &   &   \\
   0 & 0 & 1 &   &   &   &   &   \\
   0 & 0 & 0 &   &   &   &   &   \\
     &   &   & 0 & 1 &   &   &   \\
     &   &   & 0 & 0 &   &   &   \\
     &   &   &   &   & 0 & 1 &   \\
     &   &   &   &   & 0 & 0 &   \\
     &   &   &   &   &   &   & 0
\Rix.
$$
(The blank entries represent $0$; they have been omitted
to make the block structure more evident.)
In the notation of the definition
$$
\pi=(n_1,n_2,n_3,n_4),\qquad \omega=(\ell_1,\ell_2,\ell_3),
$$
where $n_1=3$, $n_2=n_3=2$, $n_4=1$, $\ell_1=4$, $\ell_2=3$, $\ell_3=1$
and
$$
n=n_1+n_2+n_3+n_4=\ell_1+\ell_2+\ell_3=8.
$$
For $j=1,2,\ldots,8$
let $E_j=\col_j(I_8)$ denote the $j$th column of the $8\times 8$
identity matrix so that
$$
WE_j = \left\{\begin{array}{ll}
         0       & \mbox{ for $j=1,4,6,8$;}\\
         E_{j-1} & \mbox{ for $j=2,3,5,7$.}
  \end{array}\right.
$$
Arrange these columns in a tableau
$$
\begin{array}{rrr}
    E_1 & E_2 & E_3 \\
    E_4 & E_5 & \\
    E_6 & E_7 & \\
    E_8 &     &
\end{array}
$$
so that
 $n_i$ is the number of entries in the $i$th row
of the tableau  and  $\ell_j$ be the number of entries in
the $j$th column. We can decorate the tableau with
arrows to indicate the effect of applying $W$:
$$
\begin{array}{lclclcl}
 0  &\leftarrow& E_1  &\leftarrow& E_2  &\leftarrow& E_3 \\
 0  &\leftarrow& E_4  &\leftarrow& E_5  &&               \\
 0  &\leftarrow& E_6  &\leftarrow& E_7  &&               \\
 0  &\leftarrow& E_8  &&                &&
\end{array}
$$
We now see a general principle:
\begin{quote}\em
 Applying $W^k$ to the tableau annihilates the  elements in the first
$k$ columns and transforms the remaining elements into the columns
of a basis for $\Range(N^k)$.
\end{quote}
Thus the $k$th eigenrank is the number
$$
\rho_k(W)= \ell_{k+1}+\ell_{k+2}+\cdots+\ell_p
$$
of elements to the right of the $k$th column.
This equation says precisely that $\omega=(\ell_1,\ell_2,\ldots,\ell_p)$
is the Weyr characteristic of $W=W_\pi$, as required.


\section{Jordan-Segre Basis}
Continue the notation of the last section. Let $\pi$ be a partition of $n$
and $W_\pi$ be the corresponding Segre matrix.
 For $j=1,2,\cdots,n$ let
$$
  E_j=\col_j(I_n)
$$
denote the $j$th column of the identity matrix $I_n$.
Then $W_\pi E_j$ is either $E_{j-1}$ or $0$ depending on $\pi$.
We'll use a double
subscript notation to specify for which values of $j$
the former alternative holds. Let
$$
E_{1,1}, \ldots, E_{1,n_1},E_{2,1},\ldots,E_{2,n_2},\ldots
$$
denote the columns $E_1,E_2,\ldots,E_n$ in that order.
Then
$$
\begin{array}{ll}
     W_\pi E_{i,1}  =  0         & \mbox{ for $i=1,2,\ldots,m$,}\\
\\
     W_\pi E_{i,j}  =  E_{i,j-1} & \mbox{ for $j=2,3,\ldots,n_i$.}
\end{array}
$$
These relations say that the doubly indexed sequence $E_{ij}$
forms a {\em Jordan-Segre Basis} for $(\F^{n\times 1},W_\pi)$.
Here's the definition.

Let $N\in\F^{n\times n}$ be a matrix and
$\VV\subseteq\F^{n\times 1}$ be a subspace.
A  \jdef{Jordan-Segre Basis} for $(\VV,N)$
$N\in\F^{n\times n}$ is a doubly indexed sequence
$$
  X_{i,j}\in\VV,\qquad (i=1,2,\ldots,m;\;\; j=1,2,\ldots, n_i)
$$
of columns  which forms a basis for $\VV$ and satisfies
$$
\begin{array}{ll}
     N X_{i,1}  =  0         & \mbox{ for $i=1,2,\ldots,m$,}\\
\\
     N X_{i,j}  = X_{i,j-1}  & \mbox{ for $j=2,3,\ldots,n_i$.}
\end{array}
$$
The sequence $\pi=(n_1,n_2,\ldots,n_m)$ is called
the \jdef{associated partition} of the basis; it is
a partition of the dimension of $\VV$:
$$
\dim(\VV) = n_1+n_2+\cdots+n_m.
$$
The condition that the elements $X_{i,j}\in\VV$
form a basis for $\VV$ means  that every $X\in\VV$
may be written uniquely as a linear combination
of these $X_{i,j}$, in other words, that the
inhomogeneous system
$$
 X= \sum_{i=1}^m\sum_{j=1}^{n_m} c_{ij} X_{ij}
$$
(in which the $c_{i,j}$ are the unknowns) has a
unique solution. Throughout most of these notes
we would have said instead that
the matrix formed from these columns is a {\em basis}
for $\VV$, but the present terminology is more conventional.
The matrix  whose columns are
$$
X_{1,1}, \ldots, X_{1,n_1},X_{2,1},\ldots,X_{2,n_2},\ldots, X_{n,n_m}
$$
(in that order)  is called the \jdef{basis corresponding}
to the Jordan-Segre basis.   In case $\VV=\F^{n\times 1}$, this
is an invertible matrix.


\begin{theorem}\label{J/S-b}
Suppose that $P\in\F^{n\times n}$ is
the basis (matrix)
corresponding to a Jordan-Segre basis
for  $(\F^{n\times 1},N)$. Then
$$
N = PW_\pi P^{-1}
$$
where $\pi$ is the associated partition.
\end{theorem}

\Proof{} Since $P$ is invertible, the conclusion may be written
as $NP= P W_\pi$.
Let $E_{i,j}$ be the $k$th column of the identity matrix
where $X_{i,j}$ be the $k$th column of $P$.
Then
$$
 \col_k(NP) = N\col_k(P) = N X_{i,j}=
\left\{\begin{array}{rr}
    X_{i,j-1}=\col_{k-1}(P) & \mbox{ if $j>1$,} \\
     0                      & \mbox{ if $j=1$,}
 \end{array}\right.
$$
while
$$
  \col_k(PW_\pi)=P\col_k(I)=
\left\{\begin{array}{rrr}
   PE_{i,j-1}=\col_{k-1}(P) & \mbox{ if $j>1$,} \\
     0                      & \mbox{ if $j=1$,}
 \end{array}\right.
$$
so
$$
\col_k(NP)=\col_k(PW_\pi).
$$
 As $k$ is arbitrary this shows that
$$
NP=PW_\pi,
$$
 as required. \QED

\section{Improved Rank Nullity Relation}
The Rank Nullity Relation~\ref{RNR}
says that for $A\in\F^{m\times n}$ we have
$$
\mathrm{rank}(A)+\mathrm{nullity}(A)= n.
$$
For the proof of the Jordan Normal Form Theorem, we'll need a
slight generalization.


\begin{lemma}
Suppose
that  $\VV\subseteq\F^{n\times 1}$ is a subspace and that
$A\in\F^{m\times n}$.   Then
$$
\dim(A\VV)+\dim\bigl(\VV\cap\NULLSP(A)\bigr) = \dim(\VV)
$$
where
$$
A\VV =\{ AX\in\F^{m\times 1}: X\in\VV\}
$$
and
$$
\VV\cap\NULLSP(A)=\{ X\in\VV : AX=0\}.
$$
\end{lemma}

\Proof{} Exercise.

\section{Proof of the Jordan Normal Form Theorem}

To prove the Jordan Normal Form Theorem~\ref{JNF-thm},
it is enough to  prove it for nilpotent matrices.
For this,
by Theorem~\ref{J/S-b},
it is enough to prove that
if $N$ is a nilpotent matrix,
there is a Jordan-Segre basis for $(\F^{n\times 1},N)$.
We shall prove this inductively.

Let $N\in\F^{n\times n}$ be nilpotent, and
let $p$ be the degree of nilpotence of $N$.
This means that
$$
N^p=0,\qquad N^{p-1}\ne 0.
$$
Let $\VV_k$ denote
the range $\Range(N^k)$ of $N^k$:
$$
  \VV_k = N^k\F^{n\times 1} = N\VV_{k-1}.
$$
Clearly,  $\VV_k\subseteq\VV_{k-1}$.
(Proof: Choose $X\in\VV_k$.
Then $X=N^kY$ for some $Y$,
so $X=N^{k-1}Z$ where $Z=NY$,
so $X\in\VV_{k-1}$.)
Hence, we have an increasing sequence
$$
 \{0\}=\VV_k\subseteq\VV_{p-1}
\subseteq\cdots\subseteq
\VV_1\subseteq\VV_0=\F^{n\times 1}
$$
of subspaces of $\F^{n\times 1}$. The theorem follows
by taking $k=0$ in the following

\begin{lemma} \label{is-J/S}
There is a Jordan-Segre basis for $(\VV_k,N)$.
\end{lemma}

\Proof{} This is proved by reverse induction on $k$.
This means that first we prove it for $k=p$, then for
$k=p-1$, then for $k=p-2$, and so on. At the $(p-k)$th stage
of the proof, we use the basis constructed for $\VV_{k+1}$
to construct a basis for $\VV_k$.

For $k=p$, the basis is empty, as $\VV_k=\{0\}$.
For $k=p-1$, any basis for $\VV_{p-1}$ is a Jordan-Segre
basis, since $NX=0$ for $X\in\VV_{p-1}$.
Now assume that we have constructed a Jordan-Segre basis
$$
\begin{array}{rrrrrr}
   X_{1,1} & X_{1,2} & \ldots & \ldots     & \ldots    & X_{1,m_1} \\
           &         & \vdots &            &           &   \\
   X_{i,1} & X_{i,2} & \ldots & \ldots     & X_{i,m_i} &  \\
           &         & \vdots &            &           &   \\
   X_{h,1} & X_{h,2} & \ldots & X_{h,m_h}  &           &
 \end{array}
$$
for $(\VV_{k+1},N)$. We shall extend it to a Jordan-Segre basis
for $(\VV_k,N)$ by adjoining an additional element to the
end of every row and (possibly) some additional elements
at the bottom of the first column.

As the elements of the basis lie in $\VV_{k+1}=N\VV_k$,
each has the form $NX$ for some $X\in\VV_k$. In particular, this
is true for these elements on the right edge of the tableau, so there
are elements $X_{i,m_i+1}\in\VV_k$ satisfying
$$
X_{i,m_i}=NX_{i,m_i+1}.
$$
We adjoin this element $X_{i,m_i+1}$ to the right end of the $i$th
row. The elements in the first column form a basis for
$\VV_{k+1}\cap\NULLSP(N)$. As $\VV_{k+1}\subseteq\VV_k$, these
elements form an independent sequence in $\VV_k\cap\NULLSP(N)$.
Hence, we may extend to a basis
$$
X_{1,1},X_{2,1},\ldots,X_{h,1},X_{h+1,1},\ldots,X_{g,1}
$$
for $\VV_k\cap\NULLSP(N)$.

We claim that this is a Jordan-Segre basis for $(\VV_k,N)$.
The elements $NX_{i,j}$ with $j>1$ are precisely the elements
of the Jordan-Segre basis for $\VV_{k+1}=N\VV_k$, while
the elements $\VV_{i,1}$ form a basis for $\VV_k\cap\NULLSP(N)$
by construction. Thus by the Rank Nullity Relation~\ref{RNR}, the
elements  $X_{i,j}$  ($i=1,2,\ldots,g$, $j\geq 1$) form a basis
for $\VV_k$, as required.
This completes the proof of the lemma
and hence of the Jordan Normal Form Theorem~\ref{JNF-thm}.\QED


\begin{example}\label{3221} Suppose that the Segre characteristic
of the nilpotent matrix $N$
is the partition $\pi=(3,2,2,1)$ of the example
in the proof of Theorem~\ref{Seg-thm}. We follow
the steps in the proof of~\ref{is-J/S} to construct
a Jordan-Segre basis. Note that $N^3=0$.
\begin{itemize}
\item
 Let $X_1$ be a basis for $\Range(N^2)$
\item
Extend to a basis $\Mat{cccc} X_1&X_2&X_4& X_6\Rix$ for
     $\Range(N)$ by
solving the inhomogeneous system  $NX_2=X_1$ for $X_2$
and
extending $\Mat{c} X_1\Rix$ to a basis
$\Mat{cccc} X_1&X_4& X_6\Rix$ of $\Range(N)\cap\NULLSP(N)$.
\item Extend to a basis
$$
   P = \Mat{cccccccc} X_1 &X_2 &X_3 &X_4 &X_5 &X_6 &X_7 &X_8 \Rix.
$$
of $\F^{8\times 1}$ by
solving the inhomogeneous systems
$$
   NX_3=X_2, \qquad NX_5=X_4, \qquad NX_7=X_6,
$$
for $X_3$, $X_5$, and $X_7$,
and then extending
$\Mat{cccc} X_1&X_4& X_6\Rix$ to a basis
$\Mat{cccc} X_1&X_4& X_6& X_8\Rix$ for $\NULLSP(N)$.
\end{itemize}
\end{example}


\bigskip

 \begin{theorem}\label{seesw}
For two nilpotent matrices of the same size,
the following conditions are equivalent:
\begin{description}
\item[(1)] they are similar;
\item[(2)] they have the same eigenranks;
\item[(3)] they have the same eigennullities;
\item[(4)] they have the same Segre characteristic;
\item[(5)] they have the same Weyr characteristic.
\end{description}
 \end{theorem}



\Proof{}
 The eigennullities and the Weyr characteristic
are related by the two equations
\begin{eqnarray*}
  \nu_k(N) &=& \ell_1+\ell_2+\cdots+\ell_k, \\
  \ell_k   &=& \nu_k(N) - \nu_{k-1}(N),
\end{eqnarray*}
and so they
determine one another. By the Rank Nullity Relation~\ref{RNR},
 $$
    \nu_k(N)+\rho_k(N)=n
  $$
the Weyr characteristic and the eigenranks determine one another.
By duality,
the Weyr characteristic and the Segre characteristic determine one
another.
This shows that conditions~(2) through~(5) are equivalent.
We have seen that (1)$\implies$(2) in Theorem~\ref{SE}.
We have proved that every nilpotent matrix is
similar to some Segre matrix $W_\pi$ (Theorems~\ref{J/S-b} and~\ref{is-J/S}),
and that the Segre characteristic of $W_\pi$ is $\pi$ (Theorem~\ref{Seg-thm}).
Hence, (4)$\implies$(1). \QED


 \begin{corollary}\label{SIM-CHAR}
\index{similarity, characterization of}
The eigenranks
$$
\rho_{\lambda,k}(A)=\mathrm{rank}\, (\lambda I- A)^k
$$
form a complete system of invariants for similarity.
\index{complete invariant, for similarity}
This means that two square matrices $A,B\in\F^{n\times n}$
are similar if and only if
 $$
  \rho_{\lambda,k}(A) = \rho_{\lambda,k}(B)\qquad
 $$
 for all $\lambda\in\C$ and all $k=1,2,\ldots$.
\end{corollary}

\Proof{} We have already proved ``only if'' as Theorem~\ref{SE}.
In the nilpotent case, ``if'' is Theorem~\ref{seesw}, just proved.
The general case follows from the nilpotent case as indicated
in the discussion just after the statement of Theorem~\ref{JNF-thm}.

\iffalse
\begin{remark}\rm
According to  Exercise~\ref{RSiM},
the similarity can be chosen real when $A$ and $B$ are real.
\end{remark}
\fi

\section{Exercises} % Jordan Normal Form

\begin{exercise}\rm  Calculate the eigenranks
$\rho_{\lambda,k}(A)$ where
$$
    A=\Mat{rrrrrr}
        5 & 1 & 0 &  &  &  \\
        0 & 5 & 1 &  &  &  \\
        0 & 0 & 5 &  &  &  \\
          &   &   & 7 & 0 & 0 \\
          &   &   & 0 & 7 & 1 \\
          &   &   & 0 & 0 & 7
     \Rix.
$$
\ifanswer
$\rho_{5,1}(A)=5$,
$\rho_{5,2}(A)=4$,
$\rho_{5,k}(A)=3$ for $k\ge 3$,
$\rho_{7,2}(A)=4$,
$\rho_{7,k}(A)=3$ for $k\ge 2$,
and $\rho_{\lambda,k}(A)=6$ for $\lambda\ne,5,7$.
\fi
\end{exercise}


\begin{exercise}\rm A $24\times 24$ matrix $N$ satisfies
$N^5=0$ and
$$
 \mathrm{rank}(N^4)=2,\quad \mathrm{rank}(N^3)=5, \quad \mathrm{rank}(N^2)=11,\quad \mathrm{rank}(N)=17.
$$
Find its Segre characteristic.
\ifanswer $\pi=(5,5,4,3,3,3,1)$.
\fi
\end{exercise}

 \begin{exercise}\rm
 For a fixed eigenvalue $\lambda$ there are
$8$  matrices in Jordan normal form of size $4\times 4$ having
$\lambda$ as the only eigenvalue.
(Each of the three entries on the superdiagonal
can be either $0$ or $1$.) Which of these are similar?
Hint: Compute the invariants $\rho_{\lambda,k}$.
 \end{exercise}

\begin{exercise}\rm Show that a matrix and its transpose are similar.
\end{exercise}


\begin{exercise}\rm Suppose that $N$ is nilpotent,
that $W$ is invertible, and that $WN=NW$.
Show that $N$ and  $NW$ are similar.
\end{exercise}


 \begin{exercise}\rm
  Prove that if $N$ is nilpotent, then $I+N$ and $e^N$
are similar.
 \end{exercise}


\begin{exercise}[Chevalley Decomposition]
 \label{Chev} \sloppy \hyphenation{unique-ly}
Show that
a square matrix $A\in\F^{n\times n}$ may be written
uniquely in the form
$$
    A=S+N
$$
where
$S$ is diagonalizable,
$N$ is nilpotent, and
$S$ and $N$ commute.
Moreover,
if $A$ is real,
then so are $S$ and $N$ (although $S$ might
have nonreal eigenvalues and thus
not be diagonalizable over $\R$).
Hint: In the complex case we may assume that
$A$ is in Jordan Normal Form. Then $S$
is diagonal and $N$ is strictly triangular.
Find polynomials $f$ and $g$ such that
$S=f(A)$ and $N=g(A)$.
\end{exercise}

\chapter{Groups and Normal Forms}

\section{Matrix Groups}

\begin{definition}\rm % matrix group
A \jdef{matrix group} is a set
$$
G\subseteq \F^{n\times n}
$$
of invertible matrices such that
 \begin{itemize} % trinity - matrix group defined
\item $G$ contains the identity matrix: $I_n\in G$.
\item $G$ is closed under taking inverses:  $P\in G\implies P^{-1}\in G$.
\item $G$ is closed under multiplication:  $P,Q\in G\implies PQ\in G$.
 \end{itemize}
\end{definition}


\begin{theorem}\label{ex:GLn}
The set   of all invertible matrices
in $\F^{n\times n}$  is a matrix group.
(It is called the \jdef{general linear group}.)
\end{theorem}


\begin{theorem}\label{ex:SLn}
The set   of all matrices
in $\F^{n\times n}$of determinant one  is a matrix group.
(It is called the \jdef{special linear group}.)
\end{theorem}

\begin{definition}\rm
A matrix $P$ is called \jdef{unitary} iff its
conjugate transpose is its inverse:
$$
     P\ctr=P^{-1}.
$$
\end{definition}

\begin{theorem}\label{ex:unitary}
The  set   of {\em all} unitary matrices
in $\F^{n\times n}$ is a matrix group.
(It is called the \jdef{unitary group}.)
\end{theorem}



\begin{definition}\rm
A matrix $P$ is called \jdef{orthogonal} iff its
transpose is its inverse:
$$
     P\tr=P^{-1}.
$$
(Thus a real matrix is unitary if and only if its orthogonal.)
\end{definition}

\begin{theorem}
The set  of all orthogonal matrices in $\F^{n\times n}$
is a matrix group.
(It is called the \jdef{orthogonal group}.)
\end{theorem}

\begin{theorem} The set of all invertible diagonal matrices
in $\F^{n\times n}$ is a matrix group.
\end{theorem}

 \begin{theorem}\Amark\ \label{tri-group}
The set of all invertible triangular (see~\ref{subsec:tri}) matrices
in $\F^{n\times n}$  is a matrix group.
\ifanswer Using \begin{description}
   \item[(1)] $I_n(\FLAG{n}{k})=\FLAG{n}{k}$.
   \item[(2)] If $P(\FLAG{n}{k})=\FLAG{n}{k}$, then
                 $\FLAG{n}{k}=P^{-1}(\FLAG{n}{k})$.
   \item[(3)] If $P(\FLAG{n}{k})=\FLAG{n}{k}$ and
                 $Q(\FLAG{n}{k})=\FLAG{n}{k}$, then
                 $PQ(\FLAG{n}{k})=\FLAG{n}{k}$.
    \end{description}
\fi
 \end{theorem}


 \begin{theorem}\label{utri-group}
   The set of all uni-triangular (see~\ref{ex:uni-triangular}) matrices
in $\F^{n\times n}$ is a matrix group.
 \end{theorem}

\begin{definition}\rm
A matrix is called \jdef{lower triangular} iff its
transpose is triangular.
\end{definition}


\begin{theorem}
The set  of all
invertible lower triangular matrices
in $\F^{n\times n}$  is  a matrix group.
\end{theorem}

\section{Matrix Invariants}

Each of the  theorems in this section has the form
%
\begin{quote}\em
Two matrices of the same size are ``equivalent''
if and only if they have the same ``invariant''.
\end{quote}
%
The equivalence relations involve  the matrix groups
of the previous section. Some of these theorems
have been proved in the text or can be easily
be deduced from theorems in the text and elementary
matrix algebra. Theorems~\ref{the:ule}, \ref{the:usim},
and~\ref{the:uequiv} use material
not explained in these notes.

\begin{definition}\rm
Two matrices
$A,B\in\F^{m\times n}$
are called \jdef{equivalent} iff
there exists an invertible matrix $Q\in\F^{m\times m}$
and an invertible matrix $P\in\F^{n\times n}$ such that
$$
A=QBP^{-1}.
$$
\end{definition}

\begin{theorem} Two matrices of the same size are
equivalent if and only if they have the same rank.
\end{theorem}


\begin{definition}\rm
Two matrices $A,B\in\F^{m\times n}$
are called \jdef{left equivalent} iff
there is an invertible matrix $Q\in\F^{m\times m}$  such that
$$
A=QB.
$$
\end{definition}

\begin{theorem} Two matrices of the same size are
left equivalent if and only if they have the null space.
\end{theorem}


\begin{definition}\rm
Two matrices $A,B\in\F^{m\times n}$
are called \jdef{right equivalent} iff
there is an invertible matrix $P\in\F^{n\times n}$  such that
$$
A=BP^{-1}.
$$
\end{definition}

\begin{theorem} Two matrices of the same size are
right equivalent if and only if they have the same range.
\end{theorem}



\begin{definition}\rm
For any matrix $A$ the rank  $\delta_{pq}(A)$ of the $p\times q$
submatrix in the upper left hand corner of $A$ is called
the $(p,q)$th \jdef{corner rank} of $A$.
Two matrices $A,B\in\F^{m\times n}$
are called \jdef{lower upper equivalent} iff
there exists an invertible lower triangular matrix $Q\in\F^{m\times m}$
and a uni-triangular  matrix
$P\in\F^{n\times n}$ such that
$$
A=QBP^{-1}.
$$
\end{definition}

\begin{theorem}
Two matrices of the same size  are lower upper equivalent
 if and only if
they have the same corner ranks.
\end{theorem}

\begin{definition}\rm
Two matrices $A,B\in\F^{m\times n}$
are called \jdef{lower equivalent} iff
$A=QB$
where $Q\in\F^{m\times m}$ is  invertible lower triangular.
Let $\FLAG{m}{k}$ denote the span of the last $k-1$ columns
of the $m\times m$ identity matrix, i.e. for $Y\in\F^{m\times 1}$
$$
   Y\in\FLAG{m}{k} \iff \entry_{k+1}(Y)=\cdots=\entry_m(Y)=0.
$$
Compare with~\ref{flag}. For $V\subseteq\F^{m\times 1}$
and $A\in\F^{m\times n}$ define
$$
   A^{-1}(V)=\{X\in \F^{n\times 1}: AX\in V\}.
$$
\end{definition}

\begin{theorem}
Two matrices $A$ and $B$ are
lower equivalent if and only if
$$
    A^{-1}\bigl(\FLAG{m}{k}\bigr) = B^{-1}\bigl(\FLAG{m}{k}\bigr)
$$
for $k=0,1,2,\ldots,m$.
\end{theorem}


\begin{definition}\rm
Two square matrices $A,B\in\F^{n\times n}$
are called \jdef{similar} iff there exists an invertible
matrix $P\in\F^{n\times n}$ such that
$$
   A=PBP^{-1}.
$$
\end{definition}

We restate Corollary~\ref{SIM-CHAR} so the reader can see the pattern.

\begin{theorem}
Two square matrices of the same size are similar
if and only if they have the same eigenranks (see~\ref{def:eigenrank}).
\end{theorem}


\begin{definition}\rm
Two square matrices $A,B\in\F^{n\times n}$
are called \jdef{unitarily similar}
iff there exists a unitary  matrix $P\in\F^{n\times n}$ such that
$$
      A=PBP^{-1}.
$$
A square matrix $A\in\F^{n\times n}$ is called \jdef{Hermitean}
iff it is equal to its conjugate transpose:
$$
      A=A\ctr.
$$
Theorem~\ref{thm:spectral} below states that a Hermitean
matrix is diagonalizable so that for each eigenvalue
the algebraic multiplicity and the geometric multiplicity
are the same.
\end{definition}

\begin{theorem} \label{the:usim}
Two  Hermitean matrices
of the same size are unitarily  similar
if and only if
they have the same eigenvalues each with the same multiplicity.
\end{theorem}



\begin{definition}\rm
Two matrices $A,B\in\F^{m\times n}$  of the same size  are called
\jdef{unitarily left equivalent}
iff there exists a unitary  matrix $Q\in\F^{m\times m}$ such that
$$
      A=QB.
$$
\end{definition}

\begin{theorem} \label{the:ule}
Two matrices $A$ and $B$ are unitarily left equivalent
if and only if
$A\ctr A=B\ctr B$.
\end{theorem}


\begin{remark}\rm
Note that the matrices $A\ctr A$ and $B\ctr B$ are Hermitean.
\end{remark}


\begin{definition}\rm
Suppose $A\in\F^{m\times n}$ and $m\ge n$.
A number $\sigma$ is called \jdef{singular value}
for a matrix $A$ iff $\sigma\ge 0$, and
$\sigma^2$ is an eigenvalue of the Hermitean matrix
$ A\ctr A$, i.e.
there is a nonzero vector $X\in\F^{n\times 1}$
satisfying the condition
$$
   A\ctr AX =\sigma^2 X.
$$
Any  $X$ satisfying
this condition is called a \jdef{singular vector}
of $A$ corresponding to the singular value  $\sigma$.
The \jdef{multiplicity}
of a  singular value $\sigma$ of  $A$
is the dimension
 $$
  \dim\,\EIG_{\sigma^2}(A\ctr A)= \mathrm{nullity}(\sigma^2 I-A\ctr A)
$$
of the corresponding space of  singular vectors.
If $m<n$, the singular values and multiplicities of $A$ are, by definition,
the same as those of $A\ctr$.
\end{definition}


\begin{definition}\rm
Two matrices $A$ and $B$ of the same size are called
\jdef{unitarily equivalent} iff there exist
unitary  matrices $Q\in\F^{m\times m}$ and $P\in\F^{n\times n}$ such that
$$
A=QBP^{-1}.
$$
\end{definition}


\begin{theorem}\label{the:uequiv}
For two matrices $A$ and $B$ of the same size
the following are equivalent:
\begin{description}
\item[(1)]  $A$ and $B$ are unitarily equivalent.
\item[(2)]  $A\ctr A$ and $B\ctr B$ are unitarily similar.
\item[(3)]  $A$ and $B$ have the same singular values
            each with the same multiplicity.
\end{description}
\end{theorem}


\section{Normal Forms}

The theorems of this section all
have the form
%
\begin{quote}\em Every matrix is ``equivalent''
to a  matrix in ``normal form''.
\end{quote}
%
The notion of equivalence is one of the equivalence
relations of the previous section.
Often (but not always) the normal form is  unique.
Not all the  theorems in this section
can be easily proved from the material in these notes.


\begin{theorem}[Gauss Jordan Decomposition]
Any  $A\in\F^{m\times n}$ may be written in the form
$$
  A=QR
$$
where $Q\in\F^{m\times m}$ is  invertible
and $R\in\F^{m\times n}$
is in reduced row echelon form. (See~\ref{subsec:rref})
If $A=Q'R'$ is another
such decomposition, then $R=R'$.
\end{theorem}

\begin{theorem}
Any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=TP^{-1}
$$
where $P\in\F^{n\times n}$ is  invertible
and $T\in\F^{m\times n}$
has a  reduced row echelon form.
If $A=T'{P'}^{-1}$ is another
such decomposition, then $T=T'$.
\end{theorem}

\begin{theorem}
Any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=QDP^{-1}
$$
where $Q\in\F^{m\times m}$ and $P\in\F^{n\times n}$ are  invertible
and $R\in\F^{m\times n}$
is in zero-one normal form. (See~\ref{subsec:0-1})
If $A=Q'R'{P'}^{-1}$ is another
such decomposition, then $D=D'$.
\end{theorem}


\begin{definition}\rm
A matrix is in \jdef{rook normal form}
iff all its entries are either zero or one and it has at
most one nonzero entry
in every row and  at most one nonzero entry in every column.
\end{definition}

\begin{theorem}\label{ex:rook}
Any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=QDP^{-1}
$$
where $Q\in\F^{m\times m}$ is  invertible lower triangular,
and $P\in\F^{n\times n}$ is uni-triangular,
and $D\in\F^{m\times n}$
is in rook normal form. If $A=Q'D'{P'}^{-1}$ is another
such decomposition, then $D=D'$.
\end{theorem}

\begin{definition}\rm
  A matrix $R\in\F^{m\times n}$
is said to be in in \jdef{leading entry normal form},
  iff there is
a matrix $D\in\F^{m\times n}$ in rook normal form,
 such that  for each pair $(p,q)$ of
indices for which $\entry_{pq}(D)\ne 0$ we have
$$
\begin{array}{lll}
      \entry_{p,q}(R)  &= 1, & \\
      \entry_{p,j}(R)  &= 0  & \mbox{ for $j<q$,}\\
      \entry_{i,q}(R)  &= 0  & \mbox{ for $p<i$.}
  \end{array}
$$
For example, the $4\times 5$ matrix
$$
R=\Mat{ccccc}
   0 & 0 & 1 & * & * \\
   0 & 0 & 0 & * & * \\
   0 & 0 & 0 & 1 & * \\
   1 & * & 0 & 0 & *
\Rix
$$
is in leading entry normal form.
\end{definition}

\begin{theorem}
Any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=LR
$$
where $L\in\F^{m\times m}$ is  invertible lower triangular
and $R\in\F^{m\times n}$
is in leading entry normal  form.
If $A=L'R'$ is another
such decomposition, then $R=R'$.
\end{theorem}

\begin{theorem}[Jordan Normal Form]
Any matrix $A\in\C^{n\times n}$ may
be be written in the form
$$
     A = PJP^{-1}
$$
where $P\in\C^{n\times n}$ is invertible and
$J$ is in Jordan normal form.
\end{theorem}

\begin{remark}\rm
The  normal form $J$ is obviously not unique;
if $J$ is diagonal and $Q$ is a permutation matrix, then
$QJQ^{-1}$ is again diagonal with the diagonal entries occurring
in a different order.
\end{remark}

\begin{theorem}[Gram Schmidt Decomposition] \quad
Any  $A\in\F^{m\times n}$
with independent columns may be written in the form
$$
  A=BP^{-1}
$$
where $P\in\F^{m\times m}$ is  positive triangular
and $B\in\F^{m\times n}$ satisfies $B\ctr B=I_n$.
If $A=B'{P'}^{-1}$ is another
such decomposition, then $B=B'$.
\end{theorem}

\begin{theorem}[Spectral Theorem]\label{thm:spectral}
Assume $\F=\C$ or $\F=\R$.
Any Hermitean matrix $A\in\F^{n\times n}$ may be written in the form
$$
  A=PDP^{-1}
$$
where $P\in\F^{n\times n}$ is unitary and $D\in\R^{n\times n}$
is real and diagonal.
\end{theorem}



\begin{definition}\rm
An $m\times n$ matrix $R$ is in \jdef{positive row echelon form} iff
it is in  row echelon form (see~\ref{subsec:ref})
and in addition
all the leading entries are positive.
\end{definition}

\begin{theorem}[Householder Decomposition]
Assume $\F=\C$ or $\R$. Then
any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=QR
$$
where $Q\in\F^{m\times m}$ is unitary and $R\in\F^{m\times n}$
is in positive row echelon form. If $A=Q'R'$ is another
such decomposition, then $R=R'$.
\end{theorem}

\begin{definition}\rm
A matrix $D$ is in \jdef{singular normal form} iff
$$
    D=\Mat{ll}
       \Delta            & 0_{r\times (n-r)}\\
       0_{(m-r)\times r} & 0_{(m-r)\times (n-r)}
    \Rix
$$
where
$$
\Delta=\diag(\sigma_1,\sigma_2,\ldots,\sigma_r)
$$
is an  $r\times r$ diagonal
matrix with positive entries $\sigma_j$ on the diagonal.
(Note that the diagonal entries of $\Delta$ (and $0$ if $r<m$) are
the singular values of $D$.)
\end{definition}

\begin{theorem}[Singular Value Decomposition]
Assume $\F=\C$ or $\F=\R$. Then
any matrix $A\in\F^{m\times n}$ may be written in the form
$$
  A=QDP^{-1}
$$
where $Q\in\F^{m\times m}$
and $P\in\F^{n\times n}$ are unitary
and $D\in\F^{m\times n}$
is in singular normal form.
\end{theorem}

\section{Exercises}


\begin{exercise}\rm Show that if $c=\cos\theta$ and $s=\sin\theta$,
then the matrix
$$
  Q=\Mat{rrr} c & s\\ -s & c \Rix
$$
is orthogonal and of determinant one.
\end{exercise}




\begin{exercise}\rm
Show that the  set of  matrices  $T\in\F^{(n+1)\times (n+1)}$ of form
$$
T=\Mat{lc} L & X_0\\ 0_{1\times n} & 1 \Rix \in\F^{(n+1)\times (n+1)}
$$
where $L\in\F^{n\times n}$ is invertible
and $X_0\in\F^{n\times 1}$ is a matrix group.
(It is called the \jdef{affine group}.)
\end{exercise}



\begin{exercise}\rm
Show that the  set of all matrices  $T$ of form
$$
T=\Mat{lc}L& X_0\\ 0_{1\times n} & 1 \Rix \in\R^{(n+1)\times (n+1)}
$$
where $L\in\R^{n\times n}$ is orthogonal  and $X_0\in\R^{n\times 1}$
is a matrix group.
(It is called the \jdef{Euclidean group}.)
\end{exercise}


\chapter{Index}
\newcommand{\indexentry}[2]{\par\noindent#1\hfill #2}
\input{index}
\end{document}
