\documentclass[runningheads]{ED2007}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{graphicx}
\usepackage[lucidasmallscale,nofontinfo]{lucimatx}
\begin{document}
\title*{Lower (Anti-)Robinson Rank Representations \protect\linebreak
for Symmetric Proximity Matrices}
\toctitle{Lower (Anti-) Robinson Rank Representations \protect\linebreak
for Symmetric Proximity Matrices}
\titlerunning{(Anti-) Robinson Representations}
\author{
Lawrence J. Hubert\inst{1}
\and
Hans-Friedrich K\"{o}hn\inst{2} }
\authorrunning{Hubert and K\"{o}hn}
\institute{
Department of Psychology, University of Illinois\\
603 East Daniel Street,
Champaign, Illinois 61820, {\it lhubert@cyrus.psych.uiuc.edu}
\and Department of Psychology, University of Illinois\\
603 East Daniel Street,
Champaign, Illinois 61820, {\it hkoehn@cyrus.psych.uiuc.edu}}
\maketitle
\begin{abstract}
Edwin Diday, some two decades ago, was among the first few individuals to recognize the importance of the
(anti-)Robinson form for representing a proximity matrix, and was the leader in suggesting how such matrices might be depicted graphically (as pyramids). We characterize the notions of an anti-Robinson (AR) and strongly anti-Robinson (SAR) matrix, and provide open-source M-files within a MATLAB environment to effect additive decompositions of a given proximity matrix into sums of AR (or SAR) matrices. We briefly introduce how the AR (or SAR) rank of a matrix might be specified.
\end{abstract}
\section{Introduction}
Various methods have been developed in the classification literature for representing the structure that may be present in a symmetric proximity matrix. The motivating bases for these strategies have been diverse, and include the reliance on spatial analogues (e.g., in multidimensional scaling), graph-theoretic concepts (e.g., in hierarchical clustering and the construction of additive trees), and order-constrained approximation matrices (e.g., matrices that satisfy the set of (anti-)Robinson (AR) order restrictions, characterized by a pattern of entries within each row and column never decreasing when moving away from the main diagonal in any direction; for historical precedents, see Robinson (1951)). It is within this last category of approximating a given proximity matrix by another that is order-constrained (and where, for convenience, proximity is now assumed keyed as a dissimilarity, so smaller values reflect more similar objects) in which Diday's contributions loam large. In the early 1980's and culminating in Diday (1986), he introduced the field to how (anti-)Robinson matrices may generally be represented through what are called pyramidal indices and their associated graphical display, or more broadly, to the relevance of the (graph-theoretic) literature on object seriation and its relation to the notion of an (anti-)Robinson form. We briefly review in this short paper a few of the advances in the last two decades, emphasizing, in particular, how sums of AR matrices might be identified and fitted through the minimization of a least-squares loss criterion. For a very comprehensive and current review of the whole area of hierarchical representations and their various extensions, the reader is referred to Barth\'{e}lemy, Brucker, and \mbox{Osswald} (2004).
\section{Some definitions}
Given an arbitrary symmetric $n \times n$ matrix, $\mathbf{A} = \{a_{ij}\}$, where the main diagonal entries are considered irrelevant and assumed to be zero (i.e., $a_{ii} = 0$ for $1 \le i \le n$), $\mathbf{A}$ is said to have an anti-Robinson (AR) form if after some reordering of the rows and columns of $\mathbf{A}$, the entries within each row and column have a distinctive pattern: moving away from the zero main diagonal entry within any row or any column, the entries never decrease. The entries in any AR matrix $\mathbf{A}$ can be reconstructed exactly through a collection of $M$ subsets of the original object set $S = \{O_{1},\ldots,O_{n}\}$, denoted by $S_{1},\ldots,S_{M}$, and where $M$ is determined by the particular pattern of tied entries, if any, in $\mathbf{A}$. These $M$ subsets have the following characteristics:
\smallskip
(i) each $S_{m}$, $1 \leq m \leq M$, consists of a sequence of (two or more) consecutive integers so that $M \leq n(n-1)/2$. (This bound holds because the number of different subsets having consecutive integers for any given fixed ordering is $n(n-1)/2$, and will be achieved if all the entries in the AR matrix $\mathbf{A}$ are distinct).
\smallskip
(ii) each $S_{m}$, $1 \leq m \leq M$, has a diameter, denoted by $d(S_{m})$, so that for all object pairs within $S_{m}$, the corresponding entries in $\mathbf{A}$ are less than or equal to the diameter. The subsets, $S_{1},\ldots,S_{M}$, can be assumed ordered as $d(S_{1}) \leq d(S_{2}) \leq \cdots \leq d(S_{M})$, and if $S_{m} \subseteq S_{m'}$, $d(S_{m}) \leq d(S_{m'})$.
\smallskip
(iii) each entry in $\mathbf{A}$ can be reconstructed from $d(S_{1}),\ldots,d(S_{M})$, i.e., for $1 \leq i,j \leq n$, \[ a_{ij} = \min_{1 \leq m \leq M} \{d(S_{m}) \mid O_{i}, O_{j} \in S_{m} \} ,\]
\noindent so that the minimum diameter for subsets containing an object pair $O_{i}, O_{j} \in S$ is equal to $a_{ij}$. Given $\mathbf{A}$, the collection of subsets $S_{1},\ldots,S_{M}$ and their diameters can be identified by inspection through the use of an increasing threshold that starts from the smallest entry in $\mathbf{A}$, and observing which subsets containing contiguous objects emerge from this process. The substantive interpretation of what $\mathbf{A}$ is depicting reduces to explaining why those subsets with the smallest diameters are so homogenous.
\smallskip
If the matrix $\mathbf{A}$ has a somewhat more restrictive form than just being AR, and is also \emph{strongly} anti-Robinson (SAR), a convenient graphical representation can be given to the collection of AR reconstructive subsets $S_{1},\ldots,S_{M}$ and their diameters, and how they can serve to retrieve $\mathbf{A}$. Specifically, $\mathbf{A}$ is said to be strongly anti-Robinson (SAR) if (considering the above-diagonal entries of $\mathbf{A}$) whenever two entries in adjacent columns are equal ($a_{ij} = a_{i(j+1)}$), those in the same two adjacent columns in the previous row are also equal ($a_{(i-1)j} = a_{(i-1)(j+1)}$ for $1 \leq i-1 < j \leq n-1$); also, whenever two entries in adjacent rows are equal ($a_{ij} = a_{(i+1)j}$), those in the same two adjacent rows in the succeeding column are also equal ($a_{i(j+1)} = a_{(i+1)(j+1)}$ for $2 \leq i+1 < j \leq n-1$).
The reconstruction of an SAR matrix through the collection of consecutively defined object subsets, $S_{1},\ldots,S_{M}$, and their diameters, and how these serve to reconstruct $\mathbf{A}$ can be modeled graphically (see Figure 1). Internal nodes would be at a height equal to the diameter of the respective subset; the consecutive objects forming that subset are identifiable by downward paths from the internal nodes to the terminal nodes corresponding to the objects in $S = \{O_{1},\ldots,O_{n}\}$. An entry $a_{ij}$ in $\mathbf{A}$ can be reconstructed as the minimum node height of a subset for which a path can be constructed from $O_{i}$ up to that internal node and then back down to $O_{j}$.
As a few final introductory historical notes, there is now a rather extensive literature on graphically representing a matrix having an AR or SAR form. The reader interested in pursuing some of the relevant literature might begin with the earlier cited reference by Diday (1986) and his introduction to graphically representing an AR matrix by a `pyramid', and then continue with the review by Durand and Fichet (1988), who point out the necessity of strengthening the AR condition to one that is SAR if a consistent graphical (pyramidal) representation is to be possible with no unresolvable graphical anomalies. For further discussion and development of some of these representations issues, the reader is referred to Diatta and Fichet (1998), Critchley (1994), Critchley and Fichet (1994), and Mirkin (1996, Chapter 7).
\subsection{An illustrative numerical example}
The proximity matrix given in Table 1 was published by \emph{The New York Times} (July 2, 2005), and contains the percentages of non-unanimous cases in which the U.S. Supreme Court Justices \emph{dis}agreed from the 1994/95 term through 2003/04 (known as the Rehnquist Court). The (upper-triangular portion of the) dissimilarity matrix is given in the same row and column order as the \emph{Times} data set, with the justices ordered from ``liberal'' to ``conservative'':
\smallskip
\hspace{10ex} 1: John Paul Stevens (St)
\hspace{10ex} 2: Stephen G. Breyer (Br)
\hspace{10ex} 3: Ruth Bader Ginsberg (Gi)
\hspace{10ex} 4: David Souter (So)
\hspace{10ex} 5: Sandra Day O'Connor (Oc)
\hspace{10ex} 6: Anthony M. Kennedy (Ke)
\hspace{10ex} 7: William H. Rehnquist (Re)
\hspace{10ex} 8: Antonin Scalia (Sc)
\hspace{10ex} 9: Clarence Thomas (Th)
\noindent The lower-triangular portion of Table 1 is a best-fitting (least-squares) SAR matrix obtained with the MATLAB M-file \verb+sarobfnd.m+ mentioned in the next section. The variance-accounted-for is 98.62\%, so there is little residual variability left. A graphical representation is given in Figure 1; the `pyramidal' structure would be more apparent if the vertical lines were tilted slightly inward toward the internal nodes.
\begin{table}
\begin{center}
\begin{tabular}{c|ccccccccc}
& St & Br & Gi & So & Oc & Ke & Re & Sc & Th \\ \hline
1 St & .00 & .38 & .34 & .37 & .67 & .64 & .75 & .86 & .85 \\
2 Br & .36 & .00 & .28 & .29 & .45 & .53 & .57 & .75 & .76 \\
3 Gi & .36 & .28 & .00 & .22 & .53 & .51 & .57 & .72 & .74 \\
4 So & .37 & .29 & .22 & .00 & .45 & .50 & .56 & .69 & .71 \\
5 Oc & .66 & .49 & .49 & .45 & .00 & .33 & .29 & .46 & .46 \\
6 Ke & .70 & .55 & .55 & .53 & .31 & .00 & .23 & .42 & .41 \\
7 Re & .70 & .55 & .55 & .53 & .31 & .23 & .00 & .34 & .32 \\
8 Sc & .86 & .74 & .74 & .70 & .46 & .42 & .33 & .00 & .21 \\
9 Th & .86 & .74 & .74 & .70 & .46 & .42 & .33 & .21 & .00 \\
\end{tabular}
\end{center}
\caption{Dissimilarities among the nine Supreme Court justices above the diagonal; best-fitting SAR values below the diagonal.}
\end{table}
\begin{figure}
\begin{center}
\setlength{\unitlength}{.5pt}
\begin{picture}(500,1000)
\put(50,0){\makebox(0,0){St}}
\put(100,0){\makebox(0,0){Br}}
\put(150,0){\makebox(0,0){Gi}}
\put(200,0){\makebox(0,0){So}}
\put(250,0){\makebox(0,0){Oc}}
\put(300,0){\makebox(0,0){Ke}}
\put(350,0){\makebox(0,0){Re}}
\put(400,0){\makebox(0,0){Sc}}
\put(450,0){\makebox(0,0){Th}}
\put(50,50){\circle{20}}
\put(100,50){\circle{20}}
\put(150,50){\circle{20}}
\put(200,50){\circle{20}}
\put(250,50){\circle{20}}
\put(300,50){\circle{20}}
\put(350,50){\circle{20}}
\put(400,50){\circle{20}}
\put(450,50){\circle{20}}
\put(375,465){\circle*{20}}
\put(331.25,510){\circle*{20}}
\put(256.25,580){\circle*{20}}
\put(187.5,540){\circle*{20}}
\put(221.875,595){\circle*{20}}
\put(153.125,705){\circle*{20}}
\put(293.75,750){\circle*{20}}
\put(187.50,752.5){\circle*{20}}
\put(257.8125,792.5){\circle*{20}}
\put(425,260){\circle*{20}}
\put(175,270){\circle*{20}}
\put(325,280){\circle*{20}}
\put(125,330){\circle*{20}}
\put(225,500){\circle*{20}}
\put(87.5,410){\circle*{20}}
\put(150,340){\circle*{20}}
\put(118.75,420){\circle*{20}}
\put(287.5,360){\circle*{20}}
\put(222.65625,905){\circle*{20}}
\put(0,50){\line(0,1){900}}
\put(197.50,752.5){\line(0,1){152.5}}
\put(247.8125,792.5){\line(0,1){112.5}}
\put(335,280){\line(0,1){185}}
\put(415,260){\line(0,1){205}}
\put(297.5,360){\line(0,1){150}}
\put(365,465){\line(0,1){45}}
\put(235,500){\line(0,1){80}}
\put(277.5,360){\line(0,1){220}}
\put(160,340){\line(0,1){200}}
\put(215,500){\line(0,1){40}}
\put(197.5,540){\line(0,1){55}}
\put(246.25,580){\line(0,1){15}}
\put(128.75,420){\line(0,1){285}}
\put(177.50,540){\line(0,1){165}}
\put(266.25,580){\line(0,1){170}}
\put(321.25,510){\line(0,1){240}}
\put(163.125,705){\line(0,1){47.5}}
\put(212.875,595){\line(0,1){157.5}}
\put(231.875,595){\line(0,1){197.5}}
\put(283.75,750){\line(0,1){42.5}}
\put(440,50){\line(0,1){210}}
\put(410,50){\line(0,1){210}}
\put(190,50){\line(0,1){220}}
\put(160,50){\line(0,1){220}}
\put(340,50){\line(0,1){230}}
\put(310,50){\line(0,1){230}}
\put(140,50){\line(0,1){280}}
\put(110,50){\line(0,1){280}}
\put(240,50){\line(0,1){450}}
\put(210,50){\line(0,1){450}}
\put(60,50){\line(0,1){360}}
\put(115,330){\line(0,1){80}}
\put(135,330){\line(0,1){10}}
\put(165,270){\line(0,1){70}}
\put(97.5,410){\line(0,1){10}}
\put(140,340){\line(0,1){80}}
\put(260,50){\line(0,1){310}}
\put(315,280){\line(0,1){80}}
\put(410,260){\line(1,0){30}}
\put(160,270){\line(1,0){30}}
\put(310,280){\line(1,0){30}}
\put(110,330){\line(1,0){30}}
\put(210,500){\line(1,0){30}}
\put(60,410){\line(1,0){55}}
\put(135,340){\line(1,0){30}}
\put(97.5,420){\line(1,0){42.5}}
\put(260,360){\line(1,0){55}}
\put(335,465){\line(1,0){80}}
\put(297.5,510){\line(1,0){67.5}}
\put(235,580){\line(1,0){42.5}}
\put(160,540){\line(1,0){55}}
\put(197.5,595){\line(1,0){48.75}}
\put(128.75,705){\line(1,0){48.75}}
\put(266.25,750){\line(1,0){55}}
\put(163.125,752.5){\line(1,0){48.75}}
\put(231.875,792.5){\line(1,0){51.875}}
\put(197.50,905){\line(1,0){50.3125}}
\put(-50,260){\vector(1,0){50}}
\put(-90,265){\makebox(0,0)[b]{.21}}
\put(-50,270){\vector(1,0){50}}
\put(-60,275){\makebox(0,0)[b]{.22}}
\put(-50,280){\vector(1,0){50}}
\put(-90,285){\makebox(0,0)[b]{.23}}
\put(-50,340){\vector(1,0){50}}
\put(-90,345){\makebox(0,0)[b]{.29}}
\put(-50,360){\vector(1,0){50}}
\put(-50,365){\makebox(0,0)[b]{.31}}
\put(-50,410){\vector(1,0){50}}
\put(-50,415){\makebox(0,0)[b]{.36}}
\put(-50,330){\vector(1,0){50}}
\put(-50,335){\makebox(0,0)[b]{.28}}
\put(-50,420){\vector(1,0){50}}
\put(-50,425){\makebox(0,0)[b]{.37}}
\put(-50,465){\vector(1,0){50}}
\put(-50,470){\makebox(0,0)[b]{.42}}
\put(-50,500){\vector(1,0){50}}
\put(-60,505){\makebox(0,0)[b]{.45}}
\put(-50,510){\vector(1,0){50}}
\put(-90,515){\makebox(0,0)[b]{.46}}
\put(-50,540){\vector(1,0){50}}
\put(-50,545){\makebox(0,0)[b]{.49}}
\put(-50,580){\vector(1,0){50}}
\put(-50,585){\makebox(0,0)[b]{.53}}
\put(-50,595){\vector(1,0){50}}
\put(-50,600){\makebox(0,0)[b]{.55}}
\put(-50,705){\vector(1,0){50}}
\put(-50,710){\makebox(0,0)[b]{.66}}
\put(-50,750){\vector(1,0){50}}
\put(-50,755){\makebox(0,0)[b]{.70}}
\put(-50,792.5){\vector(1,0){50}}
\put(-50,797.5){\makebox(0,0)[b]{.74}}
\put(-50,905){\vector(1,0){50}}
\put(-50,910){\makebox(0,0)[b]{.86}}
\end{picture}
\end{center}
\caption{A `pyramidal' representation for the SAR matrix given in Table 1 having VAF of 98.62\%}
\end{figure}
\section{Computational procedures within MATLAB}
The recent monograph by Hubert, Arabie, and Meulman (2006) provides a collection of open-source M-files (i.e., the code is freely available) within a MATLAB environment to effect a variety of least-squares structural representations for a proximity matrix. Among these are strategies to search for good-fitting AR and SAR forms, including additive decompositions of up to two such structures for a single given proximity matrix. We do not give the algorithmic details here on how these M-files are built, and instead, refer the reader to the Hubert et. al (2006) monograph. We have collected all the relevant M-files together at \verb+http://cda.psych.uiuc.edu/diday_mfiles+. The three M-files, \verb+arobfnd.m+, \verb+biarobfnd.m+, \verb+triarobfnd.m+, fit respectively, one, two, and three AR matrices to a given input proximity matrix; the three M-files, \verb+sarobfnd.m+, \verb+bisarobfnd.m+, \verb+trisarobfnd.m+, are for the strengthened SAR forms. The two files, \verb+triarobfnd.m+ and \verb+trisarobfnd.m+, are unique to this site, and should provide a programming template to extend easily, when needed, the additive decomposition to four or more matrices.
We give the help header for the representative file \verb+triarobfnd.m+ below, along with an application to a randomly constructed $10 \times 10$ proximity matrix (obtained from the contributed M-file \verb+randprox.m+). As can be seen, the (random) matrix is perfectly reconstructed by the three AR matrices (a variance-accounted-for of 1.0 is achieved). For example, the (4,6) entry in \verb+prox+ of .7948 is reconstructed based on the given output permutations, \verb+outpermone+, \verb+outpermtwo+, and \verb+outpermthree+; explicitly, we use the (4,10) entry in \verb+targone+ (.8290), the (8,9) entry in \verb+targtwo+ ($-$.0515), and the (3,9) entry in \verb+targthree+ (.0173): .7948 = .8290 + ($-$.0515) + (.0173).
\begin{verbatim}
>> help triarobfnd
TRIAROBFND finds and fits the sum of three anti-Robinson
matrices using iterative projection to a symmetric
proximity matrix in the $L_{2}$-norm based on permutations
identified through the use of iterative quadratic assignment.
syntax: [find,vaf,targone,targtwo,targthree,outpermone, ...
outpermtwo,outpermthree] = triarobfnd(prox,inperm,kblock)
PROX is the input proximity matrix ($n \times n$ with a zero
main diagonal and a dissimilarity interpretation);
INPERM is a given starting permutation of the first $n$
integers; FIND is the least-squares optimal matrix (with
variance-accounted-for of VAF to PROX and is the sum of the
three anti-Robinson matrices TARGONE, TARGTWO, and TARGTHREE
based on the three row and column object orderings given by
the ending permutations OUTPERMONE, OUTPERMTWO, and
OUTPERMTHREE. KBLOCK defines the block size in the use of
the iterative quadratic assignment routine.
\end{verbatim}
\scriptsize
\begin{verbatim}
>> prox = randprox(10)
prox =
0 0.6979 0.3784 0.8600 0.8537 0.5936 0.4966 0.8998 0.8216 0.6449
0.6979 0 0.8180 0.6602 0.3420 0.2897 0.3412 0.5341 0.7271 0.3093
0.3784 0.8180 0 0.8385 0.5681 0.3704 0.7027 0.5466 0.4449 0.6946
0.8600 0.6602 0.8385 0 0.6213 0.7948 0.9568 0.5226 0.8801 0.1730
0.8537 0.3420 0.5681 0.6213 0 0.9797 0.2714 0.2523 0.8757 0.7373
0.5936 0.2897 0.3704 0.7948 0.9797 0 0.1365 0.0118 0.8939 0.1991
0.4966 0.3412 0.7027 0.9568 0.2714 0.1365 0 0.2987 0.6614 0.2844
0.8998 0.5341 0.5466 0.5226 0.2523 0.0118 0.2987 0 0.4692 0.0648
0.8216 0.7271 0.4449 0.8801 0.8757 0.8939 0.6614 0.4692 0 0.9883
0.6449 0.3093 0.6946 0.1730 0.7373 0.1991 0.2844 0.0648 0.9883 0
>> [find,vaf,targone,targtwo,targthree, ...
outpermone,outpermtwo,outpermthree] = ...
triarobfnd(prox,randperm(10),2)
find =
0 0.6979 0.3784 0.8600 0.8536 0.5936 0.4966 0.8998 0.8216 0.6449
0.6979 0 0.8180 0.6602 0.3420 0.2897 0.3412 0.5341 0.7271 0.3093
0.3784 0.8180 0 0.8385 0.5681 0.3704 0.7027 0.5466 0.4449 0.6946
0.8600 0.6602 0.8385 0 0.6213 0.7948 0.9568 0.5226 0.8801 0.1730
0.8536 0.3420 0.5681 0.6213 0 0.9797 0.2714 0.2523 0.8757 0.7373
0.5936 0.2897 0.3704 0.7948 0.9797 0 0.1365 0.0118 0.8939 0.1991
0.4966 0.3412 0.7027 0.9568 0.2714 0.1365 0 0.2987 0.6614 0.2844
0.8998 0.5341 0.5466 0.5226 0.2523 0.0118 0.2987 0 0.4692 0.0648
0.8216 0.7271 0.4449 0.8801 0.8757 0.8939 0.6614 0.4692 0 0.9883
0.6449 0.3093 0.6946 0.1730 0.7373 0.1991 0.2844 0.0648 0.9883 0
vaf =
1.0000
targone =
0 0.6591 0.6591 0.6601 0.6601 0.7509 0.7754 0.7755 0.8757 0.8801
0.6591 0 0.3569 0.5849 0.6601 0.7509 0.7509 0.7755 0.8290 0.8290
0.6591 0.3569 0 0.3704 0.6601 0.6720 0.6851 0.7755 0.7840 0.8290
0.6601 0.5849 0.3704 0 0.1030 0.2063 0.2661 0.3883 0.7840 0.8290
0.6601 0.6601 0.6601 0.1030 0 0.2063 0.2418 0.3883 0.4269 0.8290
0.7509 0.7509 0.6720 0.2063 0.2063 0 0.0283 0.3290 0.3290 0.6651
0.7754 0.7509 0.6851 0.2661 0.2418 0.0283 0 0.2702 0.3290 0.5290
0.7755 0.7755 0.7755 0.3883 0.3883 0.3290 0.2702 0 0.2963 0.5263
0.8757 0.8290 0.7840 0.7840 0.4269 0.3290 0.3290 0.2963 0 0.5263
0.8801 0.8290 0.8290 0.8290 0.8290 0.6651 0.5290 0.5263 0.5263 0
targtwo =
0 -0.1489 0.0312 0.0312 0.0312 0.0492 0.0578 0.1813 0.2296 0.4148
-0.1489 0 -0.1392 -0.0471 -0.0333 0.0492 0.0578 0.0578 0.1344 0.1344
0.0312 -0.1392 0 -0.0537 -0.0333 0.0281 0.0376 0.0376 0.0376 0.0620
0.0312 -0.0471 -0.0537 0 -0.2446 0.0281 0.0376 0.0376 0.0376 0.0620
0.0312 -0.0333 -0.0333 -0.2446 0 -0.2488 -0.1600 0.0376 0.0376 0.0620
0.0492 0.0492 0.0281 0.0281 -0.2488 0 -0.1600 -0.0080 0.0160 0.0160
0.0578 0.0578 0.0376 0.0376 -0.1600 -0.1600 0 -0.3058 -0.0080 0
0.1813 0.0578 0.0376 0.0376 0.0376 -0.0080 -0.3058 0 -0.0515 -0.0426
0.2296 0.1344 0.0376 0.0376 0.0376 0.0160 -0.0080 -0.0515 0 -0.3495
0.4148 0.1344 0.0620 0.0620 0.0620 0.0160 0 -0.0426 -0.3495 0
targthree =
0 -0.1217 -0.0376 -0.0312 0.0346 0.0346 0.1510 0.1958 0.1962 0.1962
-0.1217 0 -0.1345 -0.1345 0.0346 0.0346 0.0364 0.1113 0.1113 0.1675
-0.0376 -0.1345 0 -0.1345 -0.0065 -0.0065 -0.0065 -0.0065 0.0173 0.0964
-0.0312 -0.1345 -0.1345 0 -0.2651 -0.0065 -0.0065 -0.0065 0.0145 0.0145
0.0346 0.0346 -0.0065 -0.2651 0 -0.0065 -0.0065 -0.0065 0.0080 0.0145
0.0346 0.0346 -0.0065 -0.0065 -0.0065 0 -0.0917 -0.0243 -0.0243 0
0.1510 0.0364 -0.0065 0.0065 -0.0065 -0.0917 0 -0.1680 -0.0243 -0.0229
0.1958 0.1113 -0.0065 -0.0065 -0.0065 -0.0243 -0.1680 0 0.0289 -0.0239
0.1962 0.1113 0.0173 0.0145 0.0080 -0.0243 -0.0243 -0.0289 0 -0.1362
0.1962 0.1675 0.0964 0.0145 0.0145 0 -0.0229 -0.0239 -0.1362 0
outpermone =
9 1 3 6 7 8 10 2 5 4
outpermtwo =
5 7 1 2 9 3 8 6 4 10
outpermthree =
9 8 4 5 3 7 10 1 6 2
\end{verbatim}
\normalsize
\section{The concept of minimum AR (or SAR) matrix rank}
Based on the type of M-file (\verb+triarobfnd.m+) illustrated in the previous section, a rather natural question arises as to the number of AR (or SAR) components necessary to exhaust perfectly any given proximity matrix. The minimum such number will be referred to as the AR (or SAR) rank of a symmetric proximity matrix. As we saw for the random $10 \times 10$ matrix in the example of the last section, we usually can do quite well with many fewer components than the order of the matrix. Although we might expect this to be true for a data matrix that is well-structured (and where two or three AR or SAR components are all that is needed to effectively exhaust the given proximity matrix), the same also appears to hold for merely randomly structured matrices.
To make this last point even more clear, a small Monte Carlo analysis was carried out in which 1000 random proximity matrices (with entries uniform on (0,1)), of sizes 10, 20, 30, 40, and 50, were approximated by sums of AR matrices to the point where at least a VAF of 99\% was achieved. The frequency results (out of 1000 such randomly generated matrices) are tabulated below:
\smallskip
\begin{tabular}{cccccccccc}
& \multicolumn{9}{c}{Number AR Components Needed} \\ \hline
\multicolumn{1}{c}{Matrix Size} & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 \\ \hline
10 & 37 & 959 & 4 & & & & & & \\
20 & & & 316 & 684 & & & & & \\
30 & & & & & 994 & 6 & & & \\
40 & & & & & & 205 & 795 & & \\
50 & & & & & & & & 995 & 5 \\
\end{tabular}
\smallskip
\noindent Figure 2 illustrates, by means of box-and-whisker plots, the incremental gain in VAF as a function of the number of fitted AR components.
\begin{center}
\begin{figure}[!h]
\setlength{\parindent}{0in}
\setlength{\parskip}{12pt}
\hfil\scalebox{.28}{\includegraphics{AR_Monte_Carlo_box_plots_all.eps}}\hfil
\caption{Incremental VAF Gains for Differing Numbers of AR Components}
\end{figure}
\end{center}
\clearpage
\begin{thebibliography}{1}
\item[]
BARTH\'{E}LEMY, J.-P., BRUCKER, F. and OSSWALD, C. (2004):
Combinatorial optimization and hierarchical classifications.
\emph{4OR: A Quarterly Journal of Operations Research 2 (3), 179--219}.
\item[]
CRITCHLEY, R. (1994):
On exchangeability-based equivalence relations induced by strongly Robinson and, in particular, by quadripolar Robinson dissimilarity matrices.
In: B. van Cutsem (Ed.):
\emph{Classification and Dissimilarity Analysis}.
Springer-Verlag, New York, 173--199.
\item[]
CRITCHLEY, R. and FICHET, B. (1994):
The partial order by inclusion of the principal classes of dissimilarity on a finite set, and some of their basic properties.
In: B. van Cutsem (Ed.):
\emph{Classification and Dissimilarity Analysis}.
Springer-Verlag, New York, 5--65.
\item[]
DIATTA, J. and FICHET, B. (1998):
Quasi-ultrametrics and their 2-ball hypergraphs.
\emph{Discrete Mathematics 192 (1-3), 87--102}.
\item[]
DIDAY, E. (1986):
Orders and overlapping clusters by pyramids.
In: J. De Leeuw, W. Heiser, J. Meulman and F. Critchley (Eds.):
\emph{Multidimensional Data Analysis}.
DSWO Press, Leiden, 201--234.
\item[]
DURAND, C. and FICHET, B. (1988):
One-to-one correspondences in pyramidal representations: A unified approach.
In: H. H. Bock (Ed.):
\emph{Classification and Related Methods of Data Analysis}.
North-Holland, Amsterdam, 85--90.
\item[]
HUBERT, L., ARABIE, P. and MEULMAN, J. (2006):
\emph{The Structural Representation of Proximity Matrices with MATLAB}.
SIAM, Philadelphia.
\item[]
MIRKIN, B. (1996):
\emph{Mathematical Classification and Clustering}.
Kluwer, Dordrecht.
\item[]
ROBINSON, W. S. (1951):
A method for chronologically ordering archaeological deposits.
\emph{American Antiquity 19 (4), 293--301}.
\end{thebibliography}
%\begin{figure}
%\caption{Incremental VAF Gains for Differing Numbers of AR Components}
%\resizebox{\textwidth}{!}(\includegraphics(AR_Monte_Carlo_box_plots_all.eps))
%\end{figure}
\end{document}