# HG changeset patch # User meillo@marmaro.de # Date 1234129902 -3600 # Node ID e57129f57faa8a13e98e88ccdc7683c4cc2e5ef0 # Parent d6ff5728dcd10f5b4b0ce6372facc29287a33fdb finished the indexing in a huge last effort diff -r d6ff5728dcd1 -r e57129f57faa thesis/scripts/improve-index.awk --- a/thesis/scripts/improve-index.awk Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/scripts/improve-index.awk Sun Feb 08 22:51:42 2009 +0100 @@ -2,16 +2,50 @@ # # improve the index + BEGIN{ - e["mta"] = "mail transfer agent (\\NAME{MTA})"; - e["mua"] = "mail user agent (\\NAME{MUA})"; - e["mda"] = "mail delivery agent (\\NAME{MUA})"; - e["smtp"] = "simple mail transfer protocol (\\NAME{SMTP})"; - e["ietf"] = "Internet Engineering Task Force (\\NAME{IETF})"; + + # special formating for entries + e["mta"] = "Mail Transfer Agent (\\NAME{MTA})"; + e["mua"] = "Mail User Agent (\\NAME{MUA})"; + e["mda"] = "Mail Delivery Agent (\\NAME{MDA})"; + e["smtp"] = "Simple Mail Transfer Protocol (\\NAME{SMTP})"; + e["ietf"] = "ietf@\\NAME{IETF}"; e["debian"] = "Debian"; e["ascii"] = "ascii@\\NAME{ASCII}"; e["gpl"] = "General Public License (\\NAME{GPL})"; + e["auth"] = "authentication"; + e["bsd"] = "Berkeley Software Distribution (\\NAME{BSD})"; + e["c"] = "C programming language"; + e["changing ip addresses"] = "changing \\NAME{IP} addresses"; + e["cyrus sasl"] = "cyrus \\NAME{SASL}"; + e["dns blacklist"] = "dns blacklist@\\NAME{DNS} blacklist"; + e["dynamic dns"] = "dynamic \\NAME{DNS}"; + e["enc"] = "encryption"; + e["fax"] = "telefax"; + e["imap"] = "imap@\\NAME{IMAP}"; + e["ipc"] = "Inter-Process Communication (\\NAME{IPC})"; + e["isp"] = "Internet Service Provider (\\NAME{ISP})"; + e["mime"] = "mime@\\NAME{MIME}"; + e["mmdf"] = "mmdf@\\NAME{MMDF}"; + e["pam"] = "pam@\\NAME{PAM}"; + e["pop3"] = "pop3@\\NAME{POP3}"; + e["relay-only mta"] = "relay-only \\NAME{MTA}"; + e["rfc"] = "Request for Comments (\\NAME{RFC})"; + e["sasl"] = "sasl@\\NAME{SASL}"; + e["smtps"] = "smtps@\\NAME{SMTPS}"; + e["starttls"] = "starttls@\\NAME{STARTTLS}"; + e["swot analysis"] = "swot analysis@\\NAME{SWOT} analysis"; + e["tcp socket"] = "tcp socket@\\NAME{TCP} socket"; + e["tcp wrapper"] = "tcp wrapper@\\NAME{TCP} Wrapper"; + e["tls"] = "Transport Layer Security (\\NAME{TLS})"; + e["uc"] = "Unified Communication"; + e["um"] = "Unified Messaging"; + e["uucp"] = "uucp@\\NAME{UUCP}"; + e["setuid"] = "setuid/setgid"; + + # persons e["Oliver Kurth"] = e["Kurth"] = "Kurth@\\textsc{Kurth, Oliver}"; e["Adam Back"] = e["Back"] = "Back@\\textsc{Back, Adam}"; e["Eric Allman"] = e["Allman"] = "Allman@\\textsc{Allman, Eric}"; @@ -49,19 +83,72 @@ e["Diomidis Spinellis"] = e["Spinellis"] = "Spinellis@\\textsc{Spinellis, Diomidis}"; e["Andrew S. Tanenbaum"] = e["Tanenbaum"] = "Tanenbaum@\\textsc{Tanenbaum, Andrew S.}"; e["Kenneth R. van Wyk"] = e["van Wyk"] = "van Wyk@\\textsc{van Wyk, Kenneth R.}"; + e["Christian Langbein"] = e["Langbein"] = "Langbein@\\textsc{Langbein, Christian}"; + e["Dad"] = "Schnalke@\\textsc{Schnalke, R\\\"udiger}"; + e["Hans-J\\\"org Schaaf"] = e["Schaaf"] = "Schaaf@\\textsc{Schaaf, Hans-J\\\"org}"; + e["Henry Atting"] = e["Atting"] = "Atting@\\textsc{Atting, Henry}"; + e["Heraclitus"] = "Heraclitus@\\textsc{Heraclitus}"; + e["James Stenard"] = e["Stenard"] = "Stenard@\\textsc{Stenard, James}"; + e["Joachim Breitner"] = e["Breitner"] = "Breitner@\\textsc{Breitner, Joachim}"; + e["Jochen Roth"] = e["Roth"] = "Roth@\\textsc{Roth, Jochen}"; + e["Julian Forster"] = e["Forster"] = "Forster@\\textsc{Forster, Julian}"; + e["Lydi"] = "Steffan@\\textsc{Steffan, Lydi}"; + e["Marc Geis"] = e["Geis"] = "Geis@\\textsc{Geis, Marc}"; + e["Markus Sch\\\"affter"] = e["Sch\\\"affter"] = "Schaeffter@\\textsc{Sch\\\"affter, Markus}"; + e["Roger Schietzel"] = e["Schietzel"] = "Schietzel@\\textsc{Schietzel, Roger}"; + e["Stephen C. Johnson"] = e["Johnson"] = "Johnson@\\textsc{Johnson, Stephen C.}"; e["Wietse Venema"] = e["Venema"] = "Venema@\\textsc{Venema, Wietse}"; + e["Volkmar Kese"] = e["Kese"] = "Kese@\\textsc{Kese, Volkmar}"; + + + # subentries + s["sendmailx"] = "sendmail X"; + s["meta1"] = "MeTA1"; + s["smtp-after-pop"] = "\\NAME{SMTP}-after-\\NAME{POP}"; + s["smtp-auth"] = "\\NAME{SMTP-AUTH}"; + + + # aliases + a["ifdef"] = "ifdef|see{conditional compilation}"; + a["envelope"] = "envelope|see{mail message}"; + a["header"] = "header|see{mail message}"; + a["body"] = "body|see{mail message}"; + a["ssl"] = "ssl@\\NAME{SSL}|see{\\NAME{TLS}}"; + a["forwarder"] = "forwarder|see{relay-only \\NAME{MTA}}"; + a["junk mail"] = "junk mail|see{spam}"; + a["fax"] = "fax|see{telefax}"; + } { - cur = $0 - gsub(/\\nobreakspace \{\}/, " ", cur); - gsub(/\\ /, " ", cur); - sub(/[^{]*{/, "", cur); - sub(/[}!].*/, "", cur); - if (e[cur]) { - sub(/{[^}!]*/, "{" e[cur]); + gsub(/\\ /, " "); + gsub(/\\nobreakspace \{\}/, " "); + gsub(/\\discretionary \{-\}\{\}\{\}/, ""); + + entry = $0 + sub(/[^{]*{/, "", entry); + sub(/[}!].*/, "", entry); + if (e[entry]) { + sub(/{[^}!]*/, "{" e[entry]); } + + subentry = $0 + sub(/[^!]*!/, "", subentry); + sub(/}.*/, "", subentry); + if (s[subentry]) { + sub(/![^}]*/, "!" s[subentry]); + } + print; } + + + +END { + # aliases + for (i in a) { + print "\\indexentry{" a[i] "}{0}"; + } +} diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/0-preface.tex --- a/thesis/tex/0-preface.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/0-preface.tex Sun Feb 08 22:51:42 2009 +0100 @@ -3,8 +3,10 @@ \addcontentsline{toc}{section}{Preface} This thesis is about \masqmail, a small mail transfer agent for workstations and home networks. In October 2007 I had chosen \masqmail\ for my machines because of its small size though it was a ``real'' mail transfer agent. \masqmail\ served me well since then and I have found no reasons to change. +\index{masqmail} Unfortunately, the \masqmail\ package in \name{Debian}, which is my preferred \NAME{GNU}/Li\-nux distribution, is unmaintained since the beginning of 2008. Unmaintained packages are likely to get dropped out of a distribution if critical bugs appear in them. Although \masqmail\ had no critical bugs, this was a situation I definitely wanted to prevent. +\index{Debian!masqmail package} Using my diploma thesis as a ``power-start'' for maintaining and developing \masqmail\ in the future was a great idea. As it came to my mind I knew this is the thing I \emph{wanted} to do. --- I did it! :-) @@ -21,6 +23,8 @@ This document is primary written with an audience of \masqmail\ developers and developers of other mail transfer agents in mind. But users of \masqmail\ and everyone who is interested in email systems in general may find this thesis an interesting literature, too. However, at least basic knowledge about Unix and C programming is a prerequisite for chapters three, four, and five. \person{Kernighan} and \person{Pike}'s ``The \NAME{UNIX} Programming Environment'' \cite{kernighan84} is a valuable source to gain information about Unix. Programming in the C language is best learned from \person{Kernighan} and \person{Ritchie}'s ``The C Programming Language'' \cite{k&r}. +\index{c} +\index{Unix} @@ -71,6 +75,7 @@ \item \name{Request for Comments} are those documents that define the Internet. They are referenced directly by their unique number. For instance: \RFC\,821. \end{enumerate} +\index{rfc} The Bibliography is located at the end of the thesis. It also includes a list of the relevant \RFC{}s and how they can be retrieved. diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/1-Introduction.tex --- a/thesis/tex/1-Introduction.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/1-Introduction.tex Sun Feb 08 22:51:42 2009 +0100 @@ -9,7 +9,9 @@ \section{Email prerequisites} -Electronic mail is a service on the Internet and thus, like other Internet services, defined and standardized by \name{Requests For Comments}\index{rfc} (short: \RFC{}s\index{rfc}) under management of the \name{Internet Engineering Task Force}\index{ietf} (short: \NAME{IETF}). \RFC{}s are highly technical documents and it is not required that the readers of this thesis are familiar with them. +Electronic mail is a service on the Internet and thus, like other Internet services, defined and standardized by \name{Requests For Comments} (short: \RFC{}s) under management of the \name{Internet Engineering Task Force} (short: \NAME{IETF}). \RFC{}s are highly technical documents and it is not required that the readers of this thesis are familiar with them. +\index{ietf} +\index{rfc} This section gives an introduction into the basic internals of the email system in a low-technical language. It is intended to make the reader familiar with the essential concepts of email as they are essential throughout the thesis. @@ -32,6 +34,7 @@ \index{mda} \name{Mail Delivery Agents} correspond to postmen in the real world. They receive mail, destined to recipients they are responsible for, from an \MTA, and deliver it to the mailboxes of those recipients. Many \MTA{}s include an own \MDA{}, but independent ones exist: \name{procmail} and \name{maildrop} are examples. \end{description} +\index{procmail} \begin{figure} \begin{center} @@ -49,35 +52,49 @@ \subsubsection{Mail transfer with SMTP} \label{smtp-intro} -Today most of the email is transferred using the \name{Simple Mail Transfer Protocol}\index{smtp} (short: \SMTP), which is defined in \RFC\,821 and the successors \RFC\,2821 and \RFC\,5321. A good entry point for further information is \citeweb{wikipedia:smtp}. +Today most of the email is transferred using the \name{Simple Mail Transfer Protocol} (short: \SMTP), which is defined in \RFC\,821 and the successors \RFC\,2821 and \RFC\,5321. A good entry point for further information is \citeweb{wikipedia:smtp}. +\index{smtp} +\index{rfc} -A selection of important concepts of \SMTP\index{smtp!concepts of} is explained here. +A selection of important concepts of \SMTP\ is explained here. +\index{smtp!concepts of} -First the \name{store-and-forward}\index{smtp!store-and-forward} transfer concept. This means mail messages are sent from \MTA\ to \MTA, until the final \MTA\ (the one which is responsible for the recipient) is reached. The message is stored for some time on each \MTA, until it is forwarded to the next \MTA. +First the \name{store-and-forward} transfer concept. This means mail messages are sent from \MTA\ to \MTA, until the final \MTA\ (the one which is responsible for the recipient) is reached. The message is stored for some time on each \MTA, until it is forwarded to the next \MTA. +\index{smtp!store-and-forward} -This leads to the concept of \name{responsibility}\index{smtp!responsibility}. A mail message is always in the responsibility of one system. First it is the \MUA\index{mua}. When it is transferred to an \MTA, this \MTA\ takes over the responsibility for the message, too. The \MUA{} can then delete its copy of the message. This is the same for each transfer---from \MTA\ to \MTA\ and finally from \MTA\ to the \MDA{}---the message gets transferred and if the transfer was successful, the responsibility for the message is transferred as well. The responsibility chain ends at a user's mailbox where he himself has control on the message. +This leads to the concept of \name{responsibility}. A mail message is always in the responsibility of one system. First it is the \MUA. When it is transferred to an \MTA, this \MTA\ takes over the responsibility for the message, too. The \MUA{} can then delete its copy of the message. This is the same for each transfer---from \MTA\ to \MTA\ and finally from \MTA\ to the \MDA{}---the message gets transferred and if the transfer was successful, the responsibility for the message is transferred as well. The responsibility chain ends at a user's mailbox where he himself has control on the message. +\index{mua} +\index{mda} +\index{smtp!responsibility} -A third concept is about failure handling. At any step on the way an \MTA\ may receive a message it is unable to handle. In such a case this receiving \MTA\ will \name{reject}\index{smtp!rejecting} the message before it takes responsibility for it. The sending \MTA\ still has responsibility for the message and may try other ways for sending the message. If none succeeds the \MTA\ will send a \name{bounce message}\index{smtp!bouncing} back to the original sender with information on the type of failure. Bounces are only sent if the failure is expected to be permanent or if the transfer still was unsuccessful after many tries. +A third concept is about failure handling. At any step on the way an \MTA\ may receive a message it is unable to handle. In such a case this receiving \MTA\ will \name{reject} the message before it takes responsibility for it. The sending \MTA\ still has responsibility for the message and may try other ways for sending the message. If none succeeds the \MTA\ will send a \name{bounce message} back to the original sender with information on the type of failure. Bounces are only sent if the failure is expected to be permanent or if the transfer still was unsuccessful after many tries. +\index{smtp!bouncing} +\index{smtp!rejecting} \subsubsection{Mail messages} -Mail messages\index{mail message} consist of text in a specific format. This format is specified in \RFC\,822, and the successors \RFC\,2822 and \RFC\,5322. +Mail messages consist of text in a specific format. This format is specified in \RFC\,822, and the successors \RFC\,2822 and \RFC\,5322. +\index{mail message} +\index{rfc} -A message has two parts, the \name{header}\index{mail message!header} and the \name{body}\index{mail message!body}. The header of an email message is similar to the header of a (formal) letter. It spans the first lines of the message up to the first empty line. The header consists of several lines, called \name{header lines}\index{mail message!header lines} or simply \name{headers}. They specify the sender, the recipient(s), the date, and possibly further information. Their order is irrelevant. Headers are named like the colon-separated start of those lines, for example the ``\texttt{Date:}'' header. A user may write the header himself but normally the \MUA{} does this job. +A message has two parts, the \name{header} and the \name{body}. The header of an email message is similar to the header of a (formal) letter. It spans the first lines of the message up to the first empty line. The header consists of several lines, called \name{header lines} or simply \name{headers}. They specify the sender, the recipient(s), the date, and possibly further information. Their order is irrelevant. Headers are named like the colon-separated start of those lines, for example the ``\texttt{Date:}'' header. A user may write the header himself but normally the \MUA{} does this job. -The body is the payload\index{mail message!payload} of the message. It is under full control of the user. From the view point of the \SMTP\ protocol, it must consist of only 7-bit \NAME{ASCII}\index{ascii} text. But arbitrary content can be included by encoding it to 7-bit \NAME{ASCII}. \NAME{MIME}\index{mime} is the common \SMTP\ extension to handle such conversion automatically in \MUA{}s. +The body is the payload of the message. It is under full control of the user. From the view point of the \SMTP\ protocol, it must consist of only 7-bit \NAME{ASCII} text. But arbitrary content can be included by encoding it to 7-bit \NAME{ASCII}. \NAME{MIME} is the common \SMTP\ extension to handle such conversion automatically in \MUA{}s. +\index{mua} +\index{mime} +\index{ascii} Following is a sample mail message with four header lines (\texttt{From:}, \texttt{To:}, \texttt{Date:}, and \texttt{Subject:}) and three lines of message body. -\codeinput{input/sample-email.txt}\index{mail message!example} +\codeinput{input/sample-email.txt} -Email messages are put into \name{envelopes}\index{mail message!envelope} for transfer. This concept is also derived from the real world so it is easy to understand. The envelope is used to route the message from sender to recipient. It contains the sender's address and addresses of one or more recipients. Envelopes are generated by \MTA{}s, usually from mail header data. The user has not to deal with them. +Email messages are put into \name{envelopes} for transfer. This concept is also derived from the real world so it is easy to understand. The envelope is used to route the message from sender to recipient. It contains the sender's address and addresses of one or more recipients. Envelopes are generated by \MTA{}s, usually from mail header data. The user has not to deal with them. Each \MTA\ on the way reads envelopes it receives and generates new ones. If a message has recipients on different hosts, then the message gets copied and sent within multiple envelopes, one for each host. -The sample message would lead to two envelopes\index{mail message!more envelopes}, one from \name{markus@host01} to \name{alice@host02}, the other from \name{markus@host01} to \name{bob@host03}. Both envelopes would contain the same message. +The sample message would lead to two envelopes, one from \name{markus@host01} to \name{alice@host02}, the other from \name{markus@host01} to \name{bob@host03}. Both envelopes would contain the same message. @@ -87,15 +104,20 @@ \section{The \masqmail\ project} \label{sec:masqmail} -The \masqmail\ project\index{masqmail!the project} was initiated by \person{Oliver Kurth} in 1999. His aim was to create a small \MTA\ that is especially focused on computers with dial-up Internet connections\index{dial-up}. Throughout the next four years he worked steadily on it, releasing new versions every few weeks. During the active phase of development 53 version have been released. In average, this is a new version every 20 days. +The \masqmail\ project was initiated by \person{Oliver Kurth} in 1999. His aim was to create a small \MTA\ that is especially focused on computers with dial-up Internet connections. Throughout the next four years he worked steadily on it, releasing new versions every few weeks. During the active phase of development 53 version have been released. In average, this is a new version every 20 days. +\index{masqmail} +\index{dial-up} -This thesis is based on the latest release of \masqmail---version 0.2.21, dated November 2005\index{masqmail!latest release}. It was released after a 28 month gap of inactivity. The source code of 0.2.21 is the same as of 0.2.20, with only build documents modified. The homepage of \masqmail\ \citeweb{masqmail:homepage2}\index{masqmail!homepage} does not include this latest release, but it can be retrieved from the \name{Debian} package pool\index{debian!package pool}\footnote{The \NAME{URL} is:\\\url{http://ftp.de.debian.org/debian/pool/main/m/masqmail/masqmail_0.2.21.orig.tar.gz}} \citeweb{packages.debian}. +This thesis is based on the latest release of \masqmail---version 0.2.21, dated November 2005. It was released after a 28 month gap of inactivity. The source code of 0.2.21 is the same as of 0.2.20, with only build documents modified. The homepage of \masqmail\ \citeweb{masqmail:homepage2} does not include this latest release, but it can be retrieved from the \name{Debian} package pool\footnote{The \NAME{URL} is:\\\url{http://ftp.de.debian.org/debian/pool/main/m/masqmail/masqmail_0.2.21.orig.tar.gz}} \citeweb{packages.debian}. +\index{debian!package pool} -\masqmail\ is covered by the \name{General Public License}\index{gpl} (short: \NAME{GPL}) version two or any later version \cite{fsf:gpl}. This qualifies \masqmail\ as Free Software\index{free software} \cite{fsf:freesw-definition}. +\masqmail\ is covered by the \name{General Public License} (short: \NAME{GPL}) version two or any later version \cite{fsf:gpl}. This qualifies \masqmail\ as Free Software \cite{fsf:freesw-definition}. +\index{gpl} +\index{Free Software} \person{Kurth} abandoned \masqmail\ after 2005 and no one adopted the project since then. Thus, the author of this thesis decided to take over responsibility for \masqmail\ now. He received \person{Kurth}'s permission to do so in private telephone conversation with \person{Kurth} on September 4, 2008. -The program's new homepage\index{masqmail!homepage} \citeweb{masqmail:homepage} includes a collection of available information about this \MTA. +The program's new homepage \citeweb{masqmail:homepage} includes a collection of available information about this \MTA. @@ -103,7 +125,8 @@ \subsection{Target field} \label{sec:masqmail-target-field} -\person{Kurth}'s intention when creating \masqmail\ is best told in his own words:\index{masqmail!design intention} +\person{Kurth}'s intention when creating \masqmail\ is best told in his own words: +\index{masqmail} \begin{quote} MasqMail is a mail server designed for hosts that do not have a permanent internet connection eg. a home network or a single host at home. It has special support for connections to different \NAME{ISP}s. It replaces sendmail or other \MTA{}s such as qmail or exim. @@ -112,7 +135,8 @@ It is intended to cover a specific niche: non-permanent Internet connection and different \name{Internet Service Providers} (short: \NAME{ISP}s). -Although it can basically replace other \MTA{}s it is not \emph{generally} aimed to do so. The package description of \masqmail\ within \name{Debian} states this more clearly by changing the last sentence to:\index{debian!masqmail package} +Although it can basically replace other \MTA{}s it is not \emph{generally} aimed to do so. The package description of \masqmail\ within \name{Debian} states this more clearly by changing the last sentence to: +\index{debian!masqmail package} \begin{quote} In these cases, MasqMail is a slim replacement for full-blown \MTA{}s such as sendmail, exim, qmail or postfix. @@ -121,9 +145,12 @@ The program is a good replacement ``in these cases'' but not generally, since it lacks essential features for running on publically accessable mail servers. It is primarily not secure enough for being accessible from untrusted locations. -\masqmail\ is best used in home networks which are non-permanently connected to the Internet\index{non-permanent}. It is easy configurable for situations which are rarely solvable with the common \MTA{}s. Such include different handling of mail to local or remote destination and respecting different routes of online connection. These features are explained in more detail in section~\ref{sec:masqmail-features}. +\masqmail\ is best used in home networks which are non-permanently connected to the Internet. It is easy configurable for situations which are rarely solvable with the common \MTA{}s. Such include different handling of mail to local or remote destination and respecting different routes of online connection. These features are explained in more detail in section~\ref{sec:masqmail-features}. +\index{non-permanent online connection} -While many other \MTA{}s are general purpose \MTA{}s, \masqmail\ aims on special situations. Nevertheless, it can be used as general purpose \MTA, too. Especially this was a design goal of \masqmail: To be a replacement for \sendmail\ or similar \MTA{}s.\index{masqmail!sendmail replacement} +While many other \MTA{}s are general purpose \MTA{}s, \masqmail\ aims on special situations. Nevertheless, it can be used as general purpose \MTA, too. Especially this was a design goal of \masqmail: To be a replacement for \sendmail\ or similar \MTA{}s. +\index{masqmail} +\index{sendmail} \masqmail\ is designed to run on workstations and on servers in small networks, like they are common in \NAME{SOHO}s (\name{Small Offices/Home Offices}). @@ -131,7 +158,8 @@ \subsubsection*{Typical usage scenarios} -This section describes three common setups that make sensible use of \masqmail. The first two are shown in figure~\ref{fig:masqmail-typical-usage}.\index{masqmail!common setups} +This section describes three common setups that make sensible use of \masqmail. The first two are shown in figure~\ref{fig:masqmail-typical-usage}. +\index{masqmail!common setups} \begin{figure} \begin{center} @@ -146,8 +174,7 @@ \begin{description} \item[Scenario 1:] \label{scenario1} -If no server is present, every workstation would be equipped with \masqmail. Mail transfer within the same machine or within the local net works straight forward using direct transfer. Outgoing mail to the Internet is sent to an \name{Internet Service Provider} (short: \NAME{ISP}) for relaying whenever the router goes online. The configuration of \masqmail\ would be the same on every computer; only host names would differ. -To receive mail from the Internet requires a mailbox on the \NAME{ISP}'s mail server. Mail needs to be fetched from the \NAME{ISP}'s server onto the workstation using the \NAME{POP3} or \NAME{IMAP} protocol. +If no server is present, every workstation would be equipped with \masqmail. Mail transfer within the same machine or within the local net works straight forward using direct transfer. Outgoing mail to the Internet is sent to an \name{Internet Service Provider} (short: \NAME{ISP}) for relaying whenever the router goes online. The configuration of \masqmail\ would be the same on every computer; only host names would differ. To receive mail from the Internet requires a mailbox on the \NAME{ISP}'s mail server. Mail needs to be fetched from the \NAME{ISP}'s server onto the workstation using the \NAME{POP3} or \NAME{IMAP} protocol. \index{isp} \index{pop3} \index{imap} @@ -155,6 +182,7 @@ \item[Scenario 2:] \label{scenario2} In the same network but with a server, one could have \masqmail\ running on the server and using simple forwarders (see section~\ref{subsec:relay-only}) on the workstations to transfer mail to the server. The server would then, dependent on the destination of the message, deliver locally or relay to an \NAME{ISP}'s server for further relay. This setup does only support mail transfer to the server but not back to a workstation. However, this can be solved by mounting the user's mailbox from the server to the workstation or by using \NAME{POP3} or \NAME{IMAP}. Mail transfer from the \NAME{ISP} to the local server needs \NAME{POP3} or \NAME{IMAP} as well. +\index{relay-only mta} \index{isp} \index{pop3} \index{imap} @@ -163,12 +191,12 @@ \label{scenario3} A third scenario is unrelated as it is about notebooks. Notebooks are usually used as mobile workstations. One uses them to work at different locations. With the increasing popularity of wireless networks this becomes more and more common. Different networks demand for different setups: In one network it is best to send mail to an \NAME{ISP} for relay. In another network it might be preferred to use a local mail server. A third network may have no Internet access at all, hence using a local mail server is required. All these different setups can be configured once and then used by simply telling the online state to \masqmail, even automatically within a network setup script. \index{isp} -\index{notebook} +\index{masqmail!on notebooks} \end{description} In general, all kinds of usage scenarios within a trusted network are possible. Important to notice is that mail can not be sent from outside into the trusted network then. For using \masqmail\ on notebooks it is suggested to only accept mail from local users because notebooks are often in untrusted environments. -\index{untrusted environments} +\index{untrusted environment} @@ -221,7 +249,7 @@ \index{masqmail!dependencies} Some parts of \masqmail's functionality can be included or excluded at compile time by defining symbols. To enable maildir support for example, one has to add \verb_--enable-maildir_ to the configure call. Otherwise the concerning code gets removed during preprocessing. -\index{exclude code} +\index{conditional compilation} \index{maildir} With \masqmail\ comes the small tool \path{mservdetect}; it helps setting up a configuration that uses the \name{mserver} system for online state detection. Two other binaries get compiled for testing purposes: \path{readtest} and \path{smtpsend}. These three additional programs use parts of \masqmail's source code; they only add a file with a \verb+main()+ function each. @@ -234,7 +262,6 @@ \label{sec:masqmail-features} \masqmail\ supports two channels for incoming mail: -\index{masqmail!incoming channels} \begin{enumerate} \item Standard input which is used when \path{masqmail} (or the \path{sendmail} link) is executed on the command line @@ -247,7 +274,7 @@ \begin{enumerate} \item Direct delivery to local mailboxes (in \name{mbox} or \name{maildir} format) - \item Local pipes to pass mail to a program (e.g.\ to \MDA{}s or to gateways to \NAME{UUCP} or fax) + \item Local pipes to pass mail to a program (e.g.\ to \MDA{}s or to gateways to \NAME{FAX} or \NAME{UUCP}) \item \NAME{TCP} sockets to transfer mail to other \MTA{}s using the \SMTP\ protocol \end{enumerate} \index{tcp socket} @@ -257,6 +284,8 @@ \index{uucp} \index{fax} \index{gateway} +\index{mda} +\index{pipe} Figure~\ref{fig:masqmail-channels} shows this as a picture. (The ``online state'' input is explained a bit later.) @@ -266,10 +295,11 @@ \end{center} \caption{Incoming and outgoing channels of \masqmail} \label{fig:masqmail-channels} - \index{figure!incoming and outgoing channels of \masqmail} \end{figure} Outgoing \SMTP\ connections feature \SMTP-\NAME{AUTH} and \SMTP-after-\NAME{POP} authentication but incoming connections do not. Using wrappers for outgoing connections is supported. This allows encrypted communication through a gateway application like \name{openssl}. +\index{openssl} +\index{wrapper} \index{auth!smtp-auth} \index{auth!smtp-after-pop} @@ -278,8 +308,6 @@ The \masqmail\ executable can be called by various names for sendmail-com\-pa\-ti\-bi\-li\-ty reasons. As many programs expect the \MTA\ to be located at \path{/usr/lib/sendmail} or \path{/usr/sbin/sendmail}, symbolic links are pointing from there to the \masqmail\ executable. Furthermore does \sendmail\ support calling it with a different name instead of supplying command line arguments. The best known of these shortcuts is \path{mailq} which is equivalent to calling it with the argument \verb+-bq+. \masqmail\ recognizes the shortcuts \path{mailq}, \path{smtpd}, \path{mailrm}, \path{runq}, \path{rmail}, and \path{in.smtpd}. The first two are inspired by \sendmail. Not implemented yet is the shortcut \path{newaliases} because \masqmail\ does not generate binary representations of the alias file.\footnote{A shell script named \path{newaliases} that invokes \texttt{masqmail -bi} can provide the command to satisfy strict requirements.} \path{hoststat} and \path{purgestat} are missing for complete sendmail-compatibility. \index{sendmail!compatibility} -\index{symbolic link} -\index{shortcuts} Additional to the \MTA\ job, \masqmail\ also offers mail retrieval services by acting as a \NAME{POP3} client. It can fetch mail from different remote locations, also dependent on the active online connection. Such functionality is especially useful in a setup like \name{Scenario 2} on page~\pageref{scenario2}. \index{pop3} @@ -288,10 +316,10 @@ \subsubsection*{Online detection and online routes} \label{sec:masqmail-routes} -\index{masqmail!online routes} +\index{online routes} \masqmail\ focuses on handling different non-permanent online connections, thus a concept of online routes is used. One may configure any number of routes to send mail. Each route can have criteria to determine if some message is allowed to be sent over it. Mail to destinations outside the local network gets queued until a suitable online connections is available. -\index{non-permanent} +\index{non-permanent online connection} The idea behind this concept is sending mail to the Internet through the mail server of the same \NAME{ISP} over which one had dialed in. It was quite common that \NAME{ISP}s accepted mail for relay only if it came from a online connection they managed. This means, it was not possible to relay mail through the mail server of one \NAME{ISP} while being online through the connection of another \NAME{ISP}. \masqmail\ is a solution to the wish of switching the relaying mail server easily. \index{isp} @@ -322,17 +350,18 @@ \section{Why \masqmail\ is worth it} -\index{masqmail!reasons to revive} +\index{masqmail} First of all, \masqmail\ is better suited for its target field of operation (multiple non-permanent online connections) than every other \MTA. Especially is such usage easy to set up because \masqmail\ was designed for that. Many alternative \MTA{}s were not designed for those scenarios at all as the following two example show: ``Exim is designed for use on a network where most messages can be delivered at the first attempt.'' \cite[page~30]{hazel01}. ``qmail was designed for well-connected hosts: those with high-speed, always-on network connectivity.'' \cite[page9]{sill02}. -\index{non-permanent} +\index{non-permanent online connection} \index{qmail} \index{exim} %fixme: hikernet Additionally does \masqmail\ make it easy to run an \MTA\ on workstations or notebooks. There is no need to do complex configuration or to be a mail server expert. Only a handful of options need to be set; the host name, the local networks, and one route for relaying are sufficient in most times. -\index{notebook} +\index{masqmail!on notebooks} +\index{configuration} Probably users say it best; in this case \person{Derek Broughton}: \index{masqmail!users} @@ -351,6 +380,7 @@ \end{quote} Not to forget \masqmail's size. \masqmail\ is much smaller than full-blown \MTA{}s like \sendmail, \postfix, or \exim, and still smaller than \qmail. (See section~\ref{sec:mta-comparison} for details.) This makes \masqmail\ a good choice for workstations or even embedded computers. +\index{qmail} Again words of a user who chose \masqmail\ as \MTA\ on his old laptop with a 75 megahertz processor and eight megabytes of \NAME{RAM}: @@ -359,15 +389,19 @@ \hfill\citeweb{stosberg:low-mem-laptop} \end{quote} \index{isp} -\index{notebook} +\index{sendmail} +\index{masqmail!on notebooks} Although the development on \masqmail\ has been stopped in 2003, \masqmail\ still has its users. Having users is already reason enough for further development and maintenance. This applies especially when the software covers a niche and when requirements for such software in general changed. Both is the case for \masqmail. It is difficult to get numbers about users of Free Software because no one needs to tell anyone when he uses some software. \name{Debian}'s \name{popcon} statistics \citeweb{popcon.debian} are a try to provided numbers. For January 2009, the statistics report 60 \masqmail\ installations of which 49 are in active use. If it is assumed that one third of all \name{Debian} users report their installed software\footnote{One third is a high guess as it means there would be only about 230 thousand \name{Debian} installations in total. But according to the \name{Linux Counter} \citeweb{counter.li.org} between 490 thousand and 12 million \name{Debian} users can be estimated.}, there would be in total around 150 active \masqmail\ installations in \name{Debian}. \name{Ubuntu} which also does \name{popcon} statistics \citeweb{popcon.ubuntu}, counts 82 installations with 13 active ones. If here also one third of all systems submit their data, 40 active installations can be added. Including a guessed amount of additional 30 installations on other Unix operating systems makes about 220 \masqmail\ installations in total. Of course one person may have \masqmail\ installed on more than one computer, but a total of 150 different users seems to be realistic. +\index{Free Software} \index{debian!popcon} \index{masqmail!users} +\index{Linux Counter} +\index{Unix} %The increasing number of systems using \masqmail, as it is shown on the \name{popcon} graph \citeweb{popcon.debian:masqmail}, seems to be impressive in the beginning as \masqmail\ was not developed during that time. But it might come from the increasing popularity of \name{popcon} over the time. @@ -388,7 +422,7 @@ \index{masqmail!problems} A program that is neglected for more than five years in a field of operation that changed during this time surely needs improvement. Security and spam have highly increased in importance since 2003. Dial-up connections became rare, instead broadband flat rates are common now. Other \MTA{}s evolved in respect to theses changes---\masqmail\ did not. -\index{dial-up connections} +\index{dial-up} The current market situation and trends for the future need to be identified. Looks at other \MTA{}s need to be taken. Required work on \masqmail\ needs to be defined in combination with the evaluation of strategies to do this work. And a plan for further development should be created. diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/2-MarketAnalysis.tex --- a/thesis/tex/2-MarketAnalysis.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/2-MarketAnalysis.tex Sun Feb 08 22:51:42 2009 +0100 @@ -8,6 +8,7 @@ \section{Electronic communication technologies} Electronic communication is ``communication by computer'', according to the \name{WordNet} database of the \name{Princeton University} \citeweb{wordnet}. Mobile phones and fax machines should be seen as computers here, too. The \name{Science Glossary} of the \name{Pennsylvania Department of Education} \citeweb{science-glossary-pa} describes electronic communication as ``System for the transmission of information using electronic technology (e.g., digital cameras, cellular telephones, Internet, television, fiber optics).'' +\index{fax} \index{electronic communication} Electronic communication needs no transport of tangible things, only electrons, photons, or radio waves need to be transmitted. Thus electronic communication is fast in general. With costs mainly for infrastructure and very low costs for data transmission is electronic communication also cheap communication. Primary the Internet is used as underlying transport infrastructure. Thus electronic communication is available nearly everywhere around the world. These properties---fast, cheap, available---make electronic communication well suited for long distance communication. @@ -20,7 +21,7 @@ \subsection{Classification} Electronic communication technologies can be divided in synchronous and asynchronous communication. Synchronous communication is direct dialog with little delay. Telephone conversation is an example. Asynchronous communication consists of independent messages. Dialogs are possible as well, but not in the same direct fashion. These two groups can also be split by the time which is needed for data delivery. Synchronous communication requires nearly real-time delivery, whereas for asynchronous communication message delivery times of several seconds or minutes are sufficient. -\index{electronic communication!classification of} +\index{electronic communication!classification} Another possible separation is to distinguish recorded and written information. Recorded information, like audio or video data, is accessible only in a linear way by spooling and replay. Written information, on the other hand, can be accessed in arbitrary sequence, detail, and speed. @@ -34,7 +35,6 @@ \end{center} \caption{Classification of electronic communication technologies} \label{fig:comm-classification} - \index{figure!Classification of electronic communication technologies} \end{figure} One might be surprised to find Instant \emph{Messaging} not in the group of \emph{message} communication. Instant Messaging could be put in both groups because it allows asynchronous communication additional to being a chat system. The reasons why it is classified as dialog communication are its primary use for dialog communication and the very fast---instant---delivery time. @@ -58,19 +58,17 @@ \end{center} \caption{Life cycle of electronic communication technologies} \label{fig:comm-lifecycle} - \index{figure!Life cycle of electronic communication technologies} \end{figure} Video messages and voice mail are technologies in the introduction phase. Voice over \NAME{IP} is heavily growing these days. Instant Messaging has reached maturation and is still growing. Email is an example for a technology in the saturation phase. Telefax, for instance, is a declining technology. +\index{fax} Email ranges in the saturation phase which is defined by a saturated market. No more products are needed: there is no more growth. This means, email is a technology which is used by everyone who want to use it. It is a standard technology. The current form of email in the current market is on the top of its life cycle. The future is decline, sooner or later. But life cycles positions change as the subject or the market changes. An examples is the \name{Flash} animation software \citeweb{flash:homepage}. The product's change from a drawing and animation system to a technology for website creation, advertising, and movie distribution, and the thus changing target market, made it slip back on the life cycle. If the email system would evolve to become the basis for Unified Messaging (see section~\ref{sec:unified-messaging}), a similar slip back would be the consequence. -\index{flash} \index{um} The \NAME{DVD} standards \NAME{DVD+} and \NAME{DVD$-$} are an example for a changing market. With the upcoming next generation formats \name{Blu-ray Disc} \citeweb{wikipedia:bluray} and \NAME{HD-DVD} \citeweb{wikipedia:hddvd}, a much sooner decline of \NAME{DVD+} and \NAME{DVD$-$} started, even before they reached their last improvement steps in storage size. Such can happen to email too, if Unified Messaging is a revolution to the email system instead of an evolution. -\index{dvd} \index{um} @@ -87,7 +85,7 @@ \index{um} Today email still is the major asynchronous communication technology and it probably will be it for the next years. Unified Messaging needs similar transfer facilities as email, thus it seems to be rather an evolution to the current technology than a revolution. Hence \MTA{}s will still be of importance in future, though maybe in a modified form. -\index{mta!future importance of} +\index{mta} \subsubsection*{Integration} @@ -132,6 +130,7 @@ Unified messaging is a nice idea, but a tough sell: The reason you bought a cell phone, a pager, and a fax/modem is because each does its job well. No one wants to download voice mail as a series of RealAudio messages or sit through a voice mail bot spelling out email, complete with `semicolon dash end-parenthesis' for ;-). \hfill\cite{wired:hype} \end{quote} +\index{fax} @@ -166,7 +165,7 @@ \end{quote} The amount of spam is huge. Panda Security and Commtouch write in their \name{Email Threats Trend Report} for the second Quarter of 2008: ``Spam levels throughout the second quarter averaged 77\,\%, ranging from a low of 64\,\% to a peak of 94\,\% of all email [...]'' \cite[page 4]{panda:email-threats}. The report sees the main source of spam in bot nets consisting of zombie computers: ``Spam and malware levels remain high for yet another quarter, powered by the brawny yet agile networks of zombie \NAME{IP}s.'' \cite[page 1]{panda:email-threats}. This is supported by IronPort Systems: ``More than 80 percent of spam now comes from a `zombie'---an infected \NAME{PC}, typically in a consumer broadband network, that has been hijacked by spammers.'' \cite{ironport:zombie-computers}. Positive for \MTA{}s is that they are not the main source for spam, but it is only a small delight. Spam is a general weakness of the email system because it is not stoppable. -\index{spam!sources of} +\index{spam!sources} @@ -175,8 +174,9 @@ \subsubsection*{Opportunities} Opportunities of the market are large data transfers, originating in multimedia content, which becomes popular. If email is used as basis for Unified Messaging, lots of voice and video mail will be transferred. Email is weak related to this kind of data: The data needs to be encoded to \NAME{ASCII} which stresses mail servers a lot. Additionally a lot of traffic is generated by the \name{store-and-forward} transfer, which \SMTP\ uses. +\index{ascii} \index{um} -\index{store-and-forward} +\index{smtp!store-and-forward} The use of different hardware to access mail is another opportunity of the market. But as more hardware gets involved, the networks become more complex. Thus the need for more software and infrastructure to transfer mail within the growing network might be a weakness of the email system. @@ -186,6 +186,7 @@ The increasing integration of communication channels is an opportunity for the market. But deciding whether it is a weakness or strength of email is difficult. Due to the impossibility to integrate synchronous stream data and large binary data, it is a weakness. But it is also a strength, because arbitrary asynchronous communication data already can be integrated. On the other hand, the integration might be a threat too, because integration often leads to complexity of software. Complex software is more error prone and thus less reliable. This, however, could again be a strength of electronic mail because its modular design decreases complexity. Figure~\ref{fig:email-swot} displays the \NAME{SWOT} analysis in a handy overview. It is obvious to see, that the opportunities outweigh. This is an indicator for a still increasing market. %fixme: ref +\index{swot analysis} \begin{figure} \begin{center} @@ -193,7 +194,6 @@ \end{center} \caption{\NAME{SWOT} analysis for email} \label{fig:email-swot} - \index{figure:\NAME{SWOT} analysis for email} \end{figure} @@ -234,8 +234,8 @@ \index{dial-up} \index{isp} -Nowadays, dial-up Internet access became rare; the majority of the users has broadband Internet access. As a flat rate is payed for it, the time being online does not affect costs anymore, even traffic is unlimited. Today it is possible to have an own mail server running at home. The remaining technical problem is the changing \NAME{IP} addresses one gets assigned every 24 hours\footnote{This, at least, is the situation in Germany.}. But this is solvable with one of the dynamic \NAME{DNS} services; they provide the mapping of a fixed domain name to the changing \NAME{IP} addresses. -\index{changing ip addresses} +Nowadays, dial-up Internet access became rare; the majority of the users has broadband Internet access. As a flat rate is payed for it, the time being online does not affect costs anymore, even traffic is unlimited. Today it is possible to have an own mail server running at home. The remaining technical problem is the changing \NAME{IP} addresses one gets assigned every 24 hours\footnote{At least this is the situation in Germany.}. But this is solvable with one of the dynamic \NAME{DNS} services; they provide the mapping of a fixed domain name to the changing \NAME{IP} addresses. +\index{dynamic dns} Home servers become popular for central data storage and multimedia services, these days. Being assembled of energy efficient hardware, power consumption is no big problem anymore. These home servers will replace video recorders and \NAME{CD} music collections in the near future. It is also realistic that they will manage heating systems and intercoms, too. Given the future leads to this direction, it will be a logical step to have email and other communication provided by the own home server as well. \index{home server} @@ -247,10 +247,12 @@ \index{push email} The retrieval of email is a field that is also about to change these days. The old way is to fetch email by polling the server that holds the personal mailbox. This polling is normally done in regular intervals, often once every five to thirty minutes. The mail transfer from the mailbox to the \MUA\ is initiated from the user side. The disadvantage herewith is the delay between the arrival of mail on the server and the time when the user finally has the message on his screen. +\index{mua} To remove this disadvantage, \name{push email} \citeweb{pushemail.co.uk} was invented. Here the server is not polled every few minutes about new mail, but the server pushes new mail directly to the client on arrival. The transfer is initiated by the server. This concept became popular with smart phones; they were able to do emailing but the traffic caused by polling the server was expensive. The concept works well with mobile phones where the provider knows about the client, but it does not seem to be a choice for computers, since the provider needs to have some kind of login to push data to the user's computer. Push email, however, could swap over to computers when using a home server and no external provider. A possible scenario is a home server which receives mail from the Internet and pushing it to own workstations and smart phones. The configuration could be done by the user by using some simple interface, like one configures his telephone system to have different telephone numbers ringing on specified phones. +\index{configuration} Another problem is when multiple clients share one mailbox. This is only solvable by working directly in the server's mailbox, which causes lots of traffic, or by storing at least information about read messages and the like there. @@ -262,6 +264,7 @@ As main change, the sender has the responsibility for mail storage; only a notification about a mail message gets sent to the recipient. The recipient can then fetch the message then from the sender's server. This is in contrast to the \SMTP\ mail architecture where mail and the responsibility for it is transferred from the sender to the receiver. (See page~\pageref{smtp-intro} for the \name{store-and-forward} principle.) \index{smtp!store-and-forward} +\index{smtp!responsibility} \MTA{}s are still important in this new email architecture, but in a slightly different way. They do not transfer mail itself anymore, but they transport the notifications about new mail to the destinations. This is a quite similar job as in the \NAME{SMTP} model. The real transfer of the mail, however, can be done in an arbitrary way, for example via \NAME{FTP} or \NAME{SCP}. @@ -269,6 +272,7 @@ \index{Guarded Email} \name{Hashcash} by \person{Adam Back}---a third concept---tries to limit spam and denial of service attacks \cite{back02}. It requests payment for email. The costs are computing time for the generation of hash values. Thus sending spam becomes expensive. Further information about \name{Hashcash} can be found on \citeweb{hashcash:homepage}. +\index{denial of service attack} \index{Hashcash} New concepts, like the ones presented here, are invented to remove problems of the email technology. \name{Internet Mail 2000}, for instance, removes the spam problem and the problem of large message transfers. @@ -283,10 +287,12 @@ \paragraph{Easy configuration} Provider independence through running an own mail server at home asks for easy configuration of the \MTA. Providers have specialists to configure the systems, but ordinary people do not. Solutions are either having some home service system for computer configuration established with specialists coming to ones home to set up the systems; like it is already common for problems with the power and water supply systems. Or configuration needs to be easy and fool-proof, so it can be done by the owner himself. The latter solution depends on standardized parts that fit together seamlessly. The technology must not be a problem itself. Only settings that are custom to the users environment should be left open for him to set. This of course needs to be doable using a simple configuration interface like a web interface. Non-technical educated users should be able to configure the system. -\index{easy configuration} +\index{configuration} Complex configuration itself is not a problem if simplification wrappers provide an easy interface. The approach of wrappers to make it look easier to the outside is a good concept in general. It still lets the specialist do complex and detailed configuration while also a simple configuration interface to novices is offered. \sendmail\ took this approach with the \name{m4} macros \cite{sendmail:config}. Further more is this approach well suited to provide various wrappers with different user interfaces (e.g.\ graphical programs, websites, command line programs; all of them either in a questionnaire style or interactive). -\index{sendmail!m4 macros} +\index{sendmail} +\index{wrapper} +\index{m4 macros} \paragraph{Performance} When \MTA{}s become popular on home servers and maybe even on workstations and smart phones, then performance will be less important. Providers need \MTA{}s that process large amounts of mail in short time. There is no need for home servers and workstations to handle that much mail; they need to process far less email messages per time unit. Thus performance will probably not be a main requirement for an \MTA\ in future, given they mainly run on private machines. @@ -294,6 +300,7 @@ \paragraph{Flexibility} New mailing concepts and architectures like push email or \name{Internet Mail 2000} will, if they succeed, require \MTA{}s to adopt the new technology. \MTA{}s that are not able to change are going to be sorted out by evolution. Thus it is important \emph{not} to focus too much on one use case, but to stay flexible. \person{Allman} saw the flexibility of \sendmail\ one reason for its huge success (see section~\ref{sec:sendmail}). +\index{sendmail} \index{flexibility} \paragraph{Security} @@ -334,6 +341,7 @@ \paragraph{\NAME{SWOT} analysis} Not only the market influences email's future safety, but also must the email technology itself evolve to satisfy upcoming needs. Actions to take were discovered by using the \NAME{SWOT} analysis. These are: Prepare against spam. Search solutions for large data transfers and increasing growth and ramification of networks. Exploit standardization, modularity, and extendability. +\index{swot analysis} \paragraph{Trends} Also needed is awareness for new trends like: Provider independence, new delivery concepts, and completely new emailing concepts that introduce new protocols. Easy configuration, as well as the somehow opposed flexibility, will be important, but not performance. Security will be essential. @@ -342,6 +350,7 @@ What kinds of \MTA{}s will be needed in future? Probably ones running on home servers and workstations. This is what \masqmail\ was designed for. The dial-up Internet connections, which are central to \masqmail's design, become rare, but mobile clients that move between different networks do need similar concepts, too. This makes \masqmail\ still be a good \MTA\ for such usage. Additionally, \masqmail\ is small and it is much easier to configure for setups that are common to workstations and home servers, than other \MTA{}s. \MTA{}s might become more commodity software, like web servers already are today, with the purpose to be included in many systems with only minimal configuration. +\index{configuration} \masqmail\ is a valuable program for various situations. Some setups became rare, but others are expected to become popular in the next years. It is expected that \masqmail's niche will rather grow than shrink. diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/3-MailTransferAgents.tex --- a/thesis/tex/3-MailTransferAgents.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/3-MailTransferAgents.tex Sun Feb 08 22:51:42 2009 +0100 @@ -38,7 +38,6 @@ \label{subsec:relay-only} Also called \name{forwarders}. This is the most simple kind of an \MTA. It transfers mail only to defined \name{smart hosts}\footnote{\name{smart host}s are mail servers that receive email and route it to the actual destination.}. Relay-only \MTA{}s do not receive mail from outside the system and they do not deliver locally. All they do is transfer mail to a specified smart host for further relay. -\index{forwarder} \index{relay-only mta} \index{smart host} @@ -60,13 +59,17 @@ \subsubsection*{``Real'' MTAs} -\index{real mta} +\index{mta!real ones} There is a third type of \MTA{}s in between the minimalistic \name{relay-only} \MTA{}s and the feature loaded \name{groupware}. Those programs may be named ``real \MTA{}s'', or ``proper \MTA{}s'', though there is no common name. They are what is meant with the term ``mail transfer agent''---programs that transfer mail between hosts. Common to them is their focus on the email transfer, while they are able to act as smart hosts. Their variety ranges from ones mostly restricted to mail transfer (e.g.\ \qmail) to others having interfaces for adding further mail processing modules (e.g.\ \postfix). This group covers everything in between the other two groups. \name{Real} \MTA{}s include \sendmail, \exim, \qmail, and \postfix. +\index{sendmail} +\index{postfix} +\index{qmail} +\index{exim} \subsubsection*{Other segmenting} @@ -75,9 +78,11 @@ Due to \sendmail's significance in the early times of email, compatibility interfaces to \sendmail\ are important for Unix \MTA{}s. The reason is that many mail applications simply assume the \sendmail\ \MTA\ to be installed on the system. Being not \name{sendmail-compatible} may not matter for some fields of action, but makes the program ineligible for serving as a general purpose \MTA\ on Unix systems. Hence being sendmail-compatible is a major property of an \MTA. \MTA{}s without \name{sendmail-compatible} interfaces, or at least compatibility add-ons, will not be covered here. One example for such a program is \name{Apache James}. \index{sendmail!compatibility} +\index{Unix} Another separation can be done between Free Software \MTA{}s and proprietary ones. Many of the \MTA{}s for Unix systems are Free Software. Only these are regarded throughout this thesis, because comparing Free Software with proprietary or commercial software is not what typical users of programs like \masqmail\ do. Comparison with non-free programs may be a point for large Free Software projects that try to step into the business world. Small projects, mostly used by individuals at home, need to be compared against other projects of similar shape. The document is seen from \masqmail's point of view---an \MTA\ for Unix systems on home servers and workstations---so non-free software is out of the way. -\index{freesw} +\index{Free Software} +\index{Free Software projects} @@ -85,7 +90,7 @@ \subsubsection*{\masqmail's position} -\index{masqmail!position of} +\index{masqmail!position} Now, where does \masqmail\ fit in? It is not groupware nor a simple forwarder, thus it belongs to the ``real \MTA{}s''. Additionally, it is Free Software and is sendmail-compatible to a large degree. This makes it similar to \sendmail, \exim, \qmail, and \postfix. \masqmail\ is intended to be a replacement for those \MTA{}s. @@ -105,11 +110,15 @@ This section introduces a selection of popular \MTA{}s; they are the most likely substitutes for \masqmail. All are sendmail-compatible ``smart'' Free Software \MTA{}s that focus on mail transfer, as is \masqmail. The programs chosen to be compared are: \sendmail, \exim, \qmail, and \postfix. They are the most important representatives of the regarded group. +\index{sendmail} +\index{postfix} +\index{qmail} +\index{exim} \subsection{Market share analysis} \label{sec:market-share} -\index{mta!market share analysis} +\index{mta!market share} \MTA\ statistics are rare, differ, and good data is hard to collect. These points are bad if good statistics are wanted. Thus it is obvious there are only few available. @@ -120,15 +129,22 @@ \input{tbl/mta-market-share.tbl} \end{center} \caption{Market share of \MTA{}s} - \index{table!Market share of \MTA{}s} \label{tab:mta-market-share} \end{table} All surveys show high market shares for the four \MTA{}s: \sendmail, \exim, \qmail, and \postfix. Only the \name{Microsoft} mail server software and \name{IMail} have comparable large shares. Other Free Software \MTA{}s (\name{smail}, \name{zmailer}, \NAME{MMDF}, \name{courier-mta}) are less important and seldom used. +\index{mmdf} +\index{smail} +\index{zmailer} +\index{courier-mta} The three surveys base on different data. \person{Bernstein} took 1\,000\,000 randomly chosen \NAME{IP} addresses, containing 39\,206 valid hosts; 958 of them accepted \NAME{SMTP} connections. The \name{O'ReillyNet} survey used only domains owned by companies; in total 400\,000 hosts. \name{MailRadar} scanned 2\,818\,895 servers, leading to 59\,209 accepted connections. All surveys show \sendmail\ to be the most popular \MTA. \postfix, \qmail, and \exim\ are among the top six in each. \exim\ has slightly smaller shares than the other two. The four programs together share more than half of the market according to \person{Bernstein} and the \name{MailRadar} statistics. \name{O'ReillyNet} has their share to be somewhere between a third and the half. This uncertainty comes from the large amount of unidentifiable \MTA{}s. +\index{sendmail} +\index{postfix} +\index{qmail} +\index{exim} The 22 percent of \name{mail security layers} in the \name{O'ReillyNet} survey is remarkable. Mail security layers are software guards between the network and the \MTA\ that filter unwanted mail before it reaches the \MTA. This increases security by filtering malicious content and by blocking attacks against the \MTA. The large share here may be a result of only regarding business mail servers. The problem concerning the survey is the disguise of the \MTA{}s that run behind the security layer. It seems wrong to assume equal shares for the \MTA{}s behind the guards as for the unguarded \MTA{}s, because mail security layers will be more often used to guard weak \MTA{}s, as strong ones do not need them so much. This needs to be kept in mind when looking at the \name{O'ReillyNet} survey. \index{mail security layer} @@ -137,6 +153,7 @@ \subsection{The four major Free Software MTAs} +\index{Free Software} Now follows a small introduction to the four programs chosen for comparison. \masqmail\ is not presented here as it was already introduced in chapter~\ref{chap:introduction}. Longer introductions, including analysis and comparison, were written by \person{Jonathan de Boyne Pollard} \cite{jdebp}. @@ -147,8 +164,10 @@ \index{sendmail} \sendmail\ is the best known \MTA, since it was one of the first and surely the one that made \MTA{}s popular. It also was shipped as default \MTA{}s by many Unix system vendors \citeweb{wikipedia:sendmail}. +\index{Unix} The program was written by \person{Eric Allman} as the successor of his program \name{delivermail}. \person{Allman} was not the only one who was working on the program. Other people developed own versions of it and a variety of flavors came up, especially in the late eighties when Allman was inactive \cite[page~5]{vixie01}. +\index{delivermail} \sendmail\ is designed to transfer mails between different protocols and networks, this lead to a very flexible, though complex, configuration. @@ -157,11 +176,10 @@ %fixme: write about its importance and about sendmail-compat Further development will go into the project \name{MeTA1} which succeeds \sendmail. The former name of this new project was \name{sendmail~X}. -\index{meta1} -\index{sendmailx} +\index{sendmail!meta1} +\index{sendmail!sendmailx} More information can be found on the \sendmail\ homepage \citeweb{sendmail:homepage} and in the, so called, \name{Bat Book} \cite{costales97}. -\index{sendmail!homepage} @@ -170,6 +188,7 @@ \index{exim} \exim\ was started in 1995 by \person{Philip Hazel} at the \name{University of Cambridge}. It is a fork of \name{smail-3}, and inherited the monolithic architecture which is similar to \sendmail's. But having no architecture-given separation of the individual components of the system did not hurt. Its security is quite good \cite{blanco05}. +\index{smail} \exim\ is highly configurable, especially in the field of mail policies. This makes it easy to specify how mail is routed through the system and who is allowed to send email to whom. Interfaces to integrate spam and malware checkers are provided by design, too. @@ -177,7 +196,6 @@ \index{gpl} One finds \exim\ on its homepage \citeweb{exim:homepage}. The standard literature is \person{Hazel}'s \exim\ book \cite{hazel01}. -\index{exim!homepage} @@ -186,17 +204,18 @@ \index{qmail} \qmail\ is seen by its community as ``a modern \SMTP\ server which makes sendmail obsolete'' \citeweb{qmail:homepage2}. It was written by \person{Daniel~J.\ Bernstein}, starting in 1995. His primary goal was to create a secure \MTA\ to replace the popular, but vulnerable, \sendmail. His own words are: ``This is why I started writing qmail: I was sick of the security holes in sendmail and other \MTA{}s.'' \citeweb{qmail:homepage1}. +\index{sendmail} \qmail\ first introduced many innovative concepts in \MTA\ design. The most obvious contrast to \sendmail\ and \exim\ is its modular design. But \qmail\ was not the first modular \MTA. \NAME{MMDF}, which predates even \sendmail, was modular, too. Regardless of \NAME{MMDF}'s modular architecture, \qmail\ is generally seen as the first security-aware \MTA\ \citeweb{wikipedia:qmail}. +\index{exim} +\index{mmdf} The latest release of \qmail\ is version 1.03 from July 1998. Afterwards, in November 2007, \qmail's source was put into the \name{public domain}. This made it Free Software. -\index{public domain} Because of \person{Bernstein}'s inactivity, though the requirements changed since 1998, ``[a] motley krewe of qmail contributors (see the \NAME{README}) has put together a netqmail-1.06 distribution of qmail. It is derived from Daniel Bernstein's qmail-1.03 plus bug fixes, a few feature enhancements, and some documentation.'' \citeweb{netqmail:homepage}. -\index{netqmail} +\index{qmail!netqmail} \qmail's homepages are \citeweb{qmail:homepage1} and \citeweb{qmail:homepage2}. The best book about \qmail, from \person{Bernstein}'s view, is \person{Dave Sill}'s handbook \cite{sill02}. His free available guide ``Life with qmail'' is another valuable source \cite{lifewithqmail}. -\index{qmail!homepage} @@ -205,13 +224,14 @@ \index{postfix} The \postfix\ project started in 1999 at \NAME{IBM} \name{research}, then called \name{VMailer} or \NAME{IBM} \name{Secure Mailer}. \person{Wietse Venema}'s program ``attempts to be fast, easy to administer, and secure. The outside has a definite Sendmail-ish flavor, but the inside is completely different.'' \citeweb{postfix:homepage}. In fact, \postfix\ was mainly designed after qmail's architecture to gain security. But in contrast to \qmail\ it aims much more on being fast and full-featured. +\index{qmail} Today \postfix\ is taken by many Unix systems and \NAME{GNU}/Linux distributions as default \MTA. +\index{Unix} The latest stable version is numbered 2.5.6 from December 2008. \postfix\ is covered by the \NAME{IBM} \name{Public License 1.0} which is a Free Software license. Additional information can be retrieved from the program's homepage \citeweb{postfix:homepage}. \person{Dent}'s \postfix\ book \cite{dent04} claims to be ``the definitive guide'', and it is. -\index{postfix!homepage} @@ -225,13 +245,13 @@ This section does not try to provide a throughout \MTA\ comparison, because this is already done by others. Remarkable comparisons are the one by \person{Dan Shearer} \cite{shearer06} and a discussion on the mailing list \name{plug@lists.q-linux.com} \cite{plug:mtas}. Tabular overviews may be found at \citeweb{mailsoftware42}, \citeweb{wikipedia:comparison-of-mail-servers}, and \cite[section 1.9]{lifewithqmail}. Here provided is an overview on important properties of the four previously introduced \MTA{}s. The data comes from the above stated sources and is collected in table~\ref{tab:mta-comparison}\footnote{The lines of code were measured with \person{David~A.\ Wheeler}'s \name{sloccount} \citeweb{sloccount}.}. +\index{lines of code} \begin{table} \begin{center} \input{tbl/mta-comparison.tbl} \end{center} \caption{Comparison of \MTA{}s} - \index{table!Comparison of \MTA{}s} \label{tab:mta-comparison} \end{table} @@ -244,13 +264,25 @@ Two different architecture types show off: monolithic and modular \MTA{}s. Monolithic \MTA{}s are \sendmail, \name{smail}, \exim, and \masqmail. They all consist of one single \emph{setuid root}\footnote{\emph{setuid} lets a program run with the rights of its owner, here root. This is considered to be a security risk. Thus it it should be avoided if possible.} binary which does all the work. +\index{root privilege} +\index{setuid} +\index{sendmail} +\index{exim} +\index{smail} Modular \MTA{}s are \NAME{MMDF}, \qmail, \postfix, and \name{MeTA1}. They consist of several programs, each doing only a part of the overall job. The different programs run with the least permissions they need, \emph{setuid root} can be avoided completely. +\index{postfix} +\index{qmail} +\index{mmdf} +\index{sendmail!meta1} The architecture does not directly define the program's security, but ``[t]he goal of making a software secure can be better achieved by making the design simple and easier to understand and verify'' \cite[chapter~6]{hafiz05}. \exim, though being monolithic, has a fairly clean security record. But it is very hard to keep the security up as the program growth. \person{Wietse Venema} (the author of \postfix) says, it was the architecture that enabled \postfix\ to grow without running into security problems \cite[page 13]{venema:postfix-growth}. \index{security} +\index{postfix} +\index{exim} The modular design, with each sub-program doing one part of the overall job, conforms to the \name{Unix Philosophy}. The Unix Philosophy \cite{gancarz95} demands ``small is beautiful'' and ``make each program do one thing well''. Monolithic \MTA{}s fail here. +\index{Unix!philosophy} Today modular \MTA\ architectures are the state-of-the-art. @@ -261,7 +293,11 @@ Spam and malware increased during the last years. Today it is important for an \MTA\ to be able to provide checking for bad mail. This can be done by implementing functionality into the \MTA\ or by invoking external programs to do this job. \sendmail\ invented \name{milter}\footnote{``milter'' is a common abbreviation for ``sendmail mail filter \NAME{API}''.}, which is used to interface external programs of various kind. \postfix\ adopted the \name{milter} interface but is also able to easily include scanning modules into its modular structure. \qmail\ is pretty old and did not evolve with the changing market situation. Anyhow, its modular structure enables external scanners to be included into \qmail. \exim\ has the advantage that it was designed with the goal to provide extensive scanning facilities; it is therefore very good suited to scan itself or invoke external scanners. +\index{sendmail} +\index{postfix} +\index{qmail} \index{milter} +\index{exim} \subsubsection*{Future trends} @@ -271,14 +307,28 @@ \paragraph{Provider independence} The first trend was provider independence, which requires easy configuration. \postfix\ seems to do best here. It uses primary two configuration files (\path{master.cf} and \path{main.cf}) which are easy to manage. \sendmail\ appears to have a bad position. Its configuration file \path{sendmail.cf} is cryptic and very complex (it has legendary Turing-completeness) thus it needs simplification wrappers around it to provide easier configuration. They exist in form of the \name{m4} macros that generate the \path{sendmail.cf} file. Unfortunately, adjusting the generated result by hand appears to be necessary for non-trivial configurations. \qmail's configuration files are simple but the whole system is complex to set up; it requires various system users and \qmail\ is hardly usable without applying several patches that add functionality which is required nowadays. \name{netqmail} is the community's effort to help in the latter point. \exim\ has only one single configuration file (\path{exim.conf}) which suffers most from its flexibility---like in \sendmail's case. Flexibility and easy configuration are almost always contrary goals. +\index{sendmail} +\index{postfix} +\index{qmail} +\index{wrapper} +\index{exim} +\index{configuration} +\index{m4 macros} \paragraph{Performance} \index{performance} As second trend was the decreasing necessity for high per\-for\-mance identified. This goes along with the move of \MTA{}s from service providers to home servers. \postfix\ focuses much on performance, this might not be an important point in the future. Of course there will still be the need for high performance \MTA{}s, but a growing share of the market will not require high performance. Energy and space efficiency is related to performance; it is a similar goal in a different direction. But optimization, be it for performance or other efficiencies, is often in contrast to simplicity and clarity; these two improve security. Optimizing does in most times decrease the simplicity and clarity. Simple \MTA{}s that do not aim for high performance are what is needed in future. The simple design of \qmail\footnote{\qmail\ is still fast} is a good example. +\index{postfix} +\index{qmail} \paragraph{Security} \index{security} The third trend (even more security awareness) is addressed by each of the four programs. It seems as if all widely used \MTA{}s provide good security nowadays. Even \sendmail\ can be configured to be secure today. However, the modular architecture, used by \qmail\ and \postfix, is generally seen to be conceptually more secure. \sendmail's creators have started \name{MeTA1}, a modular \MTA\ that merges the best of \qmail\ and \postfix, to replace the old \sendmail. It will be interesting to watch \exim's future---will it become modular, too? +\index{sendmail} +\index{postfix} +\index{qmail} +\index{sendmail!meta1} +\index{exim} diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/4-MasqmailsFuture.tex --- a/thesis/tex/4-MasqmailsFuture.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/4-MasqmailsFuture.tex Sun Feb 08 22:51:42 2009 +0100 @@ -5,7 +5,7 @@ \section{The goal} -\index{development goal} +\index{development!goal} Before requirements can be identified and further development can be discussed, it is important to clearly specify the goal to achieve. This means: What shall \masqmail\ be like in, for instance, five years? \index{masqmail!in five years} @@ -18,7 +18,7 @@ \masqmail\ was intended to be a small ``real'' \MTA\ which covers the niche of managing the relay over several smart hosts. Small and resource friendly software is still important for workstations, home servers, and especially for embedded computers. Other software that focuses on the same niche is not known. Dial-up connections have become rare but mobile computers that move between different networks are popular. So, the niche is still present. What has changed in general is the security that is needed for software. \person{Graff} and \person{van Wyk} describe the situation well: ``[I]n today's world, your software is likely to have to operate in a very hostile security environment.'' \cite[page~33]{graff03}. Additionally they say: ``By definition, mail software processes information from potentially untrusted sources. Therefore, mail software must be written with great care, even when it runs with user privileges and even when it does not talk directly to a network.'' \cite[page~90]{graff03}. As \masqmail\ is mail software and trusted environments become rare, it is best for \masqmail\ to become a secure \MTA. -\index{hostile environment} +\index{untrusted environment} \index{security} In summary, the goal for \masqmail\ is to stay in the current niche with respect to modern usage scenarios and to become a secure \MTA. @@ -55,9 +55,13 @@ Outgoing mail is commonly either sent using \SMTP, piped into local commands (for example \path{uucp}), or delivered locally by appending to a mailbox. Outgoing channels are similar for \qmail, \postfix, and \name{sendmail~X}: All of them have a module to send mail using \SMTP, and one for writing into a local mailbox. \index{outgoing channels} \index{uucp} +\index{sendmail!sendmailx} +\index{pipe} Local mail delivery is a job that uses root privilege to be able to switch to any user in order to write to his mailbox. It is possible to deliver without being root privilege, but delivery to user's home folders is not generally possible then. Thus even the modular \MTA{}s \qmail\ and \postfix\ use root privilege for this job. As mail delivery to local users is \emph{not} included in the basic job of an \MTA{} and introduces a lot of new complexity, why should the \MTA\ bother? In order to keep the system simple, reduce privilege, and to have programs that do one job well, the local delivery job should be handed over to a specialist: the \NAME{MDA}. \NAME{MDA}s know about the various mailbox formats and are aware of the problems of concurrent write access and the like. Hence passing the message, and the responsibility for it, over to an \NAME{MDA} seems to be best. \index{local delivery} +\index{mda} +\index{root privilege} This means an outgoing connection that pipes mail into local commands is required. To other outgoing channels applies what was already said about incoming channels. @@ -66,7 +70,6 @@ \includegraphics[scale=0.75]{fig/mta-channels.eps} \end{center} \caption{Required incoming and outgoing channels} - \index{figure!Required incoming and outgoing channels} \label{fig:mta-channels} \end{figure} @@ -81,20 +84,22 @@ \label{rf2} \index{mail queue} Mail queuing removes the need to deliver instantly as a message is received. The queue provides fail-safe storage of mails until they are delivered. Mail queues are probably used in all \MTA{}s, even in some simple forwarders. The mail queue is essential for \masqmail, as \masqmail\ is intended for non-permanent online connections. This means, mail must be queued until a online connection is available to send the message. This may be after a reboot. Hence the mail queue must provide persistence. -\index{forwarder} -\index{non-permanent} +\index{relay-only mta} +\index{non-permanent online connection} -The mail queue and the module(s) to manage it are the central part of the whole system. This demands especially for robustness and reliability, as a failure here can lead to mail loss. An \MTA\ takes over responsibility for mail by accepting it, hence loosing mail messages is absolutely to avoid. This covers any kind of crash situation, too. The worst thing acceptable to happen is an already sent mail to be sent again. +The mail queue and the module(s) to manage it are the central part of the whole system. This demands especially for robustness and reliability, as a failure here can lead to mail loss. An \MTA\ takes over responsibility for mail by accepting it, hence losing mail messages is absolutely to avoid. This covers any kind of crash situation, too. The worst thing acceptable to happen is an already sent mail to be sent again. +\index{smtp!responsibility} \index{reliability} +\index{mail loss} \paragraph{\RF\,3: Header sanitizing} \label{rf3} -\index{header sanitizing} +\index{mail sanitizing} Mail coming into the system often lacks important header lines. At least the required ones must be added by the \MTA. One example is the \texttt{Date:} header, another is the, not required but recommended, \texttt{Message-ID:} header. Apart from adding missing headers, rewriting headers is important, too. Changing the locally known domain part of email addresses to globally known ones is an example. \masqmail\ needs to be able to rewrite the domain part dependent on the route used to send the message, to prevent messages to get classified as spam. -\index{masqmail!online routes} +\index{online routes} Generating the envelope is a related job. The envelope specifies the actual recipient of the mail, no matter what the \texttt{To:}, \texttt{Cc:}, and \texttt{Bcc:} headers contain. Multiple recipients lead to multiple different envelopes, all containing the same mail message. @@ -103,7 +108,7 @@ \paragraph{\RF\,4: Aliasing} \label{rf4} -\index{aliases} +\index{alias expansion} Email addresses can have aliases, thus they need to be expanded. Aliases can be of different kind: another local user, a remote user, a list of local and remote users, or a command. Most important are the aliases in the \path{aliases} file, usually located at \path{/etc/aliases}. Addresses expanding to lists of users lead to more envelopes. Aliases changing the recipient's domain part may require a different route to be used. @@ -125,7 +130,8 @@ \index{open relay} \index{spam} -Several ways to restrict access are available. The most simple one is restriction by the \NAME{IP} address. No extra complexity is added this way but the \NAME{IP} addresses need to be static or within known ranges. This approach is often used to allow relaying for local nets. The access check can be done by the \MTA\ or by a guard (e.g.\ \NAME{TCP} \name{Wrappers} \cite{venema92}) before. The main advantage here is the minimal setup and maintenance work needed. This kind of access restriction is important to be implemented. +Several ways to restrict access are available. The most simple one is restriction by the \NAME{IP} address. No extra complexity is added this way but the \NAME{IP} addresses need to be static or within known ranges. This approach is often used to allow relaying for local nets. The access check can be done by the \MTA\ or by a guard (e.g.\ \NAME{TCP} \name{Wrapper} \cite{venema92}) before. The main advantage here is the minimal setup and maintenance work needed. This kind of access restriction is important to be implemented. +\index{tcp wrapper} \index{access restriction} This authentication based on \NAME{IP} addresses is impossible in situations where hosts with changing \NAME{IP} addresses, that are not part of a known sub net, need access. Then a authentication mechanism based on some \emph{secret} is required. Three common approaches exist: @@ -155,22 +161,21 @@ \label{requirement-encryption} \index{enc} Electronic mail is vulnerable to sniffing attacks, because in generic \SMTP\ all data transfer is unencrypted. The message's body, the header, and the envelope are all unencrypted. But also some authentication dialogs transfer plain text passwords (e.g.\ \NAME{PLAIN} and \NAME{LOGIN}). Hence encryption is throughout important. -\index{auth} +\index{plain text} The common way to encrypt \SMTP\ dialogs is using \name{Transport Layer Security} (short: \NAME{TLS}, the successor of \NAME{SSL}). \NAME{TLS} encrypts the datagrams of the \name{transport layer}. This means it works below the application protocols and can be used with any of them \citeweb{wikipedia:tls}. \index{tls} -\index{ssl} Using secure tunnels that are provided by external programs should be preferred over including encryption into the application, because the application needs not to bother with encryption then. Outgoing \SMTP\ connections can get encrypted using a secure tunnel, created by an external application (like \name{openssl}). But incoming connections can not use external secure tunnels, because the remote \NAME{IP} address is hidden then; all connections would appear to come from localhost instead. Figure~\ref{fig:stunnel} depicts the situation of using an application like \name{stunnel} for incoming connections. The connection to port 25 comes from localhost and this information reaches the \MTA. Authentication based on \NAME{IP} addresses and many spam prevention methods are useless then. \index{secure tunnel} \index{stunnel} +\index{openssl} \begin{figure} \begin{center} \includegraphics[scale=0.75]{fig/stunnel.eps} \end{center} \caption{Using \name{stunnel} for incoming connections} - \index{figure!Using \name{stunnel} for incoming connections} \label{fig:stunnel} \end{figure} @@ -179,6 +184,7 @@ \index{starttls} \NAME{STARTTLS}---defined in \RFC\,2487---is what \RFC\,3207 recommends to use for secure \SMTP. The connection then goes over port 25, but gets encrypted when the \NAME{STARTTLS} keyword is issued. Email depends on compatibility---only encryption methods that client and server support can be used. Hence it is best to act after the recommendations of the \RFC\ documents. This means \NAME{STARTTLS} encryption should be supported for incoming and for outgoing connections. +\index{rfc} @@ -195,7 +201,7 @@ Spam is usually identified by the results of a set of checks. Static rules, database querying (e.g.\ \NAME{DNS} blacklists \cite{cole07} \cite{levine08}), requesting special client behavior (e.g.\ \name{greylisting} \cite{harris03}, \name{hashcash} \cite{back02}), or statistical analysis (e.g.\ \name{bayesian filters} \cite{graham02}) are checks that may be used. Running more checks leads to better results, but takes more system resources and more time. \index{dns blacklist} \index{greylisting} -\index{hashcash} +\index{Hashcash} \index{bayesian filter} Doing some basic checks during the \SMTP\ dialog seems to be a must \cite[page~25]{eisentraut05}. Including these checks into the \MTA\ makes them fast to avoid \SMTP\ dialog timeouts. For modularity and reusability reasons internal interfaces to specialized modules seem to be best. \person{Raymond} says: ``Modularity (simple parts, clean interfaces) is a way to organize programs to make them simpler.'' \cite[chapter~1]{raymond03}. @@ -215,6 +221,7 @@ In any way should malware checking be performed by external programs that may be invoked by the \MTA. However, \NAME{MDA}s are better points to invoke content scanners. \index{content scanner} +\index{mda} A popular email filter framework is \name{amavis} which integrates various spam and malware scanners. The common setup includes a receiving \MTA\ which sends mail to \name{amavis} using \SMTP, \name{amavis} processes the mail and sends it then to a second \MTA\ that does the outgoing transfer. (This setup with two \MTA\ instances is discussed in more detail in section~\ref{sec:current-code-security}.) @@ -224,10 +231,13 @@ \label{rf10} \index{archiving} Mail archiving and auditability become more important as email establishes as technology for serious business communication. Archiving is a must for companies in many countries. In the United States, the \name{Sarbanes-Oxley Act} \cite{sox} covers this topic. +\index{Sarbanes-Oxley Act} It is a goal to have the ability to archive verbatim copies of every mail coming into and every mail going out of the system, with relation between them. \postfix\ for example has a \name{always\_bcc} feature, to send a copy of every outgoing mail to a definable recipient. At least this functionality should be given, although a more complete approach, like \qmail\ provides, is preferable. \qmail\ is able to save copies of all sent and received messages and additionally complete \SMTP\ dialogs \cite[page~12]{sill02}. +\index{postfix} +\index{qmail} \index{smtp!dialog} But if archiving is of high importance, a dedicated archiving solution is advisable, anyway. @@ -248,22 +258,26 @@ \masqmail\ needs to be secure enough for its target field of operation. \masqmail\ is targeted to workstations and private networks, with explicit warning to not use it on permanent online hosts \citeweb{masqmail:homepage2}. But as non-permanent online connections and trustable environments become rare, \masqmail's security should be so good that it is usable with permanent online connections and in unsafe environments. For example should mails with bad content not be able to break \masqmail. \index{masqmail!security} +\index{non-permanent online connection} \paragraph{\RG\,2: Reliability} \index{reliability} Reliability is the second essential quality property for an \MTA. Mail for which the \MTA\ took responsibility must never get lost while it is within the \MTA's responsibility. The \MTA\ must not be \emph{the cause} of any mail loss, no matter what happens. Unreliable \MTA{}s are of no value. However, as the mail transport infrastructure is a distributed system, one of the communication partners or the transport medium may crash at any time during mail transfer. Thus reliability is needed for mail transfer communication, too. +\index{smtp!responsibility} \index{mail loss} The goal is to transfer exactly one copy of the message. \person{Tanenbaum} evaluates the situation and comes to the conclusion that ``in general, there is no way to arrange this.'' \cite[pages~377--379]{tanenbaum02}. Only strategies where no mail gets lost are acceptable; he identifies three of them, but one generates more duplicates than the others, so two strategies remain. (1) The client always reissues the transfer. The server first sends an acknowledgment and then handles the transfer. (2) The client reissues the transfer only if no acknowledgment was received. The server first handles the transfer and sends the acknowledgment afterwards. The first strategy does not need acknowledgments at all, however, it will lose mail if the second transfer fails, too. Hence, mail transfer between two processes should use the strategy: The client reissues if it receives no acknowledgment. The server first handles the message and then sends the acknowledgment. This strategy only leads to duplicates if a crash happens in the time between the message is fully transferred to the server and the acknowledgment is received by the client. No mail will get lost. -\index{duplicates} +\index{duplicates of messages} \paragraph{\RG\,3: Robustness} \index{robustness} Being robust means handling errors properly. Small errors may get corrected, large errors may kill a process. Killed processes should get restarted automatically and lead to a clean state again. Log messages should be written in every case. Robust software does not need a special environment, it creates a friendly environment itself. \person{Raymond}'s \name{Rule of Robustness} and his \name{Rule of Repair} are good descriptions \cite[pages~18--21]{raymond03}. +\index{robustness!rule of} +\index{repair!rule of} \paragraph{\RG\,4: Extendability} @@ -292,31 +306,35 @@ \paragraph{\RG\,8: Availability} \index{availability} Availability is important for server programs. They must stay operational by blocking \name{denial of service} attacks and the like. Automated restarts into a clean state after fatal errors are also required. +\index{denial of service attack} \paragraph{\RG\,9: Portability} \index{portability} Source code that compiles and runs on various operation systems is called portable. Portability can be achieved by using standard features of the programming language and common libraries. Basic rules to achieve portable code are defined by \person{Kernighan} and \person{Pike} \cite{kernighan99}. Portable code lets software spread faster. Portability among the various flavors of Unix systems is a goal for \masqmail, because these systems are the ones \MTA{}s usually run on. No special care needs to be taken for non-Unix platforms. +\index{Unix} \paragraph{\RG\,10: Usability} \index{usability} Usability, not mentioned by \person{Hafiz} \cite{hafiz05} (he focuses on architecture) but by \person{Spinellis} \cite{spinellis06} and \person{Kan} \cite{kan03}, is a property which is very important from the user's point of view. Software with bad usability is rarely used, no matter how good it is. If substitutes with better usability exist, the user will switch to one of them. Here, usability includes setting up and configuring; the term ``users'' includes administrators. Having \MTA{}s on home servers and workstations requires easy and standardized configuration. The common setups should be configurable with little action by the user. Complex configuration should be possible, but the focus should be on the most common form of configuration: choosing one of several common setups. +\index{configuration} \subsection{Architecture} \label{sec:discussion-mta-arch} -\index{architecture} +\index{mta!architecture} \masqmail's current architecture is monolithic like \sendmail's and \exim's. But more than the other two is it one block of interweaved code. \exim\ has a highly structured code with many internal interfaces, a good example is the interface for authentication ``modules''. \sendmail\ provides now, with its \name{milter} interface, standardized connection channels to external modules. \masqmail\ has none of them---it is what \sendmail\ was in the beginning: a single large block. +\index{sendmail} +\index{exim} \index{milter} \index{masqmail!architecture} Figure~\ref{fig:masqmail-arch} is a call graph generated from \masqmail's source code. It gives an impression of how interweaved the internals are. There are no compartments at all. -\index{masqmail!call graph} \index{call graph} \begin{figure} @@ -325,25 +343,34 @@ \includegraphics[scale=0.75]{fig/callgraph.eps} \end{center} \caption{Internal structure of \masqmail, showed by a call graph. (Logging functions are ignored; test and \NAME{POP3} code is excluded.)} - \index{figure!Internal structure of \masqmail.} \label{fig:masqmail-arch} \end{figure} \sendmail\ improved its old architecture by adding the milter interface, to include further functionality by invoking external programs. \exim\ was designed, and is carefully maintained, with a modular-like code structure in mind. \qmail\ started from scratch with a ``security-first'' approach, \postfix\ improved on it, and \name{sendmail~X}/\name{MeTA1} tries to adopt the best of \qmail\ and \postfix\ to completely replace the old \sendmail\ architecture. \person{Hafiz} describes this evolution of \MTA\ architecture very well \cite{hafiz05}. +\index{sendmail} +\index{postfix} +\index{qmail} +\index{exim} +\index{sendmail!sendmailx} +\index{sendmail!meta1} \index{security} Every one of these programs is more modular, or became more modular over time, than \masqmail\ is. Modern requirements like spam protection and probable future requirements like the use of new mail transport protocols demand for modular designs in order to keep the software simple. Simplicity is a key property for security. ``[T]he essence of security engineering is to build systems that are as simple as possible.'' \cite[page 45]{graff03}. \index{modularity} \person{Hafiz} agrees: ``The goal of making software secure can be better achieved by making the design simple and easier to understand and verify.'' \cite[page 64]{hafiz05}. He identifies the security of \qmail\ to come from it's \name{compartmentalization}, which goes hand in hand with modularity: +\index{qmail} \index{compartmentalization} \begin{quote} A perfect example is the contrast between the feature envy early \sendmail\ architecture implemented as one process and the simple, modular architecture of \qmail. The security of \qmail\ comes from its compartmentalized simple processes that perform one task only and are therefore testable for security. \hfill\cite[page 64]{hafiz05} \end{quote} +\index{sendmail} +\index{qmail} Equal does \person{Dent} see the situation for \postfix: ``The modular architecture of Postfix forms the basis for much of its security.'' \cite[page 7]{dent04}. +\index{postfix} \index{modularity} Modularity is also needed to satisfy modern \MTA\ requirements in providing a clear interface to add functionality without increasing the overall complexity much. @@ -380,12 +407,13 @@ One single mail queue is used in \masqmail. It satisfies all current requirements. \paragraph{\RF\,3: Header sanitizing} -\index{header sanitizing} +\index{mail sanitizing} The envelope and mail headers are generated when the mail is put into the queue. The requirements are fulfilled. \paragraph{\RF\,4: Aliasing} -\index{aliases} -Aliasing is done on delivery. All common kinds of aliases in the global aliases file are supported. So called \name{.forward} aliasing is not supported, but this is less common and seldom used. +\index{alias expansion} +Alias expansion is done on delivery. All common kinds of aliases in the global aliases file are supported. So called \name{.forward} aliasing is not supported, but this is less common and seldom used. +\index{alias expansion!.forward} \paragraph{\RF\,5: Route management} \index{online routes} @@ -394,6 +422,7 @@ \paragraph{\RF\,6: Authentication} \index{auth} Static authentication, based on \NAME{IP} addresses, can be achieved with \person{Venema}'s \NAME{TCP} \name{Wrapper} \cite{venema92}, by editing the \path{hosts.allow} and \path{hosts.deny} files. This is only relevant to authenticate hosts that try to submit mail into the system. Dynamic (secret-based) \SMTP\ authentication is already supported in form of \NAME{SMTP-AUTH} and \SMTP-after-\NAME{POP}, but only for outgoing connections. For incoming connections only address-based authentication is supported. +\index{tcp wrapper} \index{auth!smtp-after-pop} \index{auth!smtp-auth} @@ -401,10 +430,14 @@ \index{enc} Similar is the situation for encryption which is also only available for outgoing channels; here a tunnel application, like \name{openssl}, is needed. A secure tunnel can be created to send mail trough. State-of-the-art, however, is using \NAME{STARTTLS}, but this is not supported. For incoming channels, no encryption is available. The only possible setup to provide encryption of incoming channels is using an application like \name{stunnel} to crypt between the secure connection to the remote host and the plain connection to the \MTA. Unfortunately, this suffers from the problem explained on page \pageref{fig:stunnel} in figure~\ref{fig:stunnel}. Anyway, it would still be no \NAME{STARTTLS} support. \index{secure tunnel} +\index{stunnel} +\index{starttls} +\index{openssl} \paragraph{\RF\,8: Spam handling} \index{spam!handling} \masqmail\ does not provide special support for spam filtering. Spam prevention by not accepting spam during the \SMTP\ dialog is not possible at all. Spam filtering is only possible by using two \masqmail\ instances with an external spam filter in between. The mail flow is from the receiving \MTA\ instance, which accepts mail, to the filter application that processes and possible modifies it, to the second \MTA\ which is responsible for further delivery of the mail. This is a concept that works in general, and it is good to separate different work with clear interfaces. But the need of two instances of the same \MTA, with doubled setup, makes it rather a work-around. Better is to have this data flow respected in the \MTA\ design, like it was done in \postfix. Anyway, the more important part of spam handling, for sure, is done during the \SMTP\ dialog by completely refusing unwanted mail. +\index{postfix} \paragraph{\RF\,9: Malware handling} \index{malware!handling} @@ -418,7 +451,8 @@ \paragraph{\RG\,1: Security} \index{security} -\masqmail's current security is bad. However, it seems acceptable for using \masqmail\ on workstations and private networks, if the environment is trustable and \masqmail\ is protected against remote attacks. In environments where untrusted components or persons have access to \masqmail, its security is too low. Its author states that \masqmail\ ``is not designed to'' such usage \citeweb{masqmail:homepage2}. This is a clear indicator for being careful. Issues like high memory consumption, low performance, and denial-of-service attacks---things not regarded by design---may cause serious problems. In any way, a security report that confirms \masqmail's security level is missing. +\masqmail's current security is bad. However, it seems acceptable for using \masqmail\ on workstations and private networks, if the environment is trustable and \masqmail\ is protected against remote attacks. In environments where untrusted components or persons have access to \masqmail, its security is too low. Its author states that \masqmail\ ``is not designed to'' such usage \citeweb{masqmail:homepage2}. This is a clear indicator for being careful. Issues like high memory consumption, low performance, and denial of service attacks---things not regarded by design---may cause serious problems. In any way, a security report that confirms \masqmail's security level is missing. +\index{denial of service attack} \index{masqmail!security} \masqmail\ uses conditional compilation to exclude unneeded functionality from the executable at compile time. Excluding code means excluding all bugs and weaknesses within this code, too. Excluding unused code is a good concept to improve security. @@ -443,10 +477,13 @@ \paragraph{\RG\,4: Extendability} \index{extendability} \masqmail's extendability is very poor. This is a general problem of monolithic software, but can though be provided with high effort. \exim\ is an example for good extendability in a monolithic program. +\index{exim} \paragraph{\RG\,5: Maintainability} \index{maintainability} The maintainability of \masqmail\ is equivalent to other software of similar kind. Missing modularity and therefore more complexity makes the maintainer's work harder. Conditional compilation might be good for security, but \name{ifdef}s scattered throughout the source code is a pain for maintenance. In summary is \masqmail's maintainability bearable, like in average Free Software projects. +\index{Free Software projects} +\index{conditional compilation} @@ -469,13 +506,17 @@ \paragraph{\RG\,9: Portability} \index{portability} The code's portability is good with view on Unix-like operation systems. At least \name{Debian}, \name{Red Hat}, \NAME{SUSE}, \name{Slackware}, \name{Free}\NAME{BSD}, \name{Open}\NAME{BSD}, and \name{Net}\NAME{BSD} are reported to be able to compile and run \masqmail\ \citeweb{masqmail:homepage2}. Special requirements for the underlying file system are not known. Thus, the portability is already good. -\index{masqmail!supported systems} +\index{bsd} +\index{Unix} +\index{masqmail} \paragraph{\RG\,10: Usability} \index{usability} The usability is very good, from the administrator's point of view. \masqmail\ was developed to suite a specific, limited job---its configuration does perfect match. The user's view does not reach to the \MTA, as it is hidden behind the \MUA. Configuration could be eased even more by providing configuration generators that enable \masqmail\ to be used right ``out of the box'' after running one of several configuration scripts for common setups. This would improve \masqmail's usability for not technical educated people. \index{out-of-the-box usage} +\index{configuration} +\index{mua} @@ -490,14 +531,12 @@ \input{tbl/requirements.tbl} \end{center} \caption{Importance of and pending work for requirements} - \index{table!Importance of and pending work for requirements} \label{tab:requirements} \end{table} The importance is ranked from `-{}-' (not important) to `++' (very important). The pending work is ranked from `-{}-' (nothing) to `++' (very much). Large work tasks with high importance need to receive much attention, they need to be in focus. In contrast should small, low importance work tasks receive few attention. Here the focus for a task is calculated by summing up the importance and the pending work with equal weight. Normally, tasks with high focus are the ones of high priority and should be done first. The functional requirements that receive highest attention are \RF\,6 (authentication), \RF\,7 (encryption), and \RF\,8 (spam handling). Of the non-functional requirements, \RG\,1 (security), \RG\,2 (reliability), and \RG\,4 (extendability), rank highest. -\index{requirements!ranking} These tasks are presented in more detail in a todo list, now. The list is sorted by focus and then by importance. @@ -505,6 +544,7 @@ \subsubsection*{\TODO\,1: Encryption (\RF\,7)} \index{enc} Encryption is chosen for number one as it is essential to provide privacy. Using \NAME{STARTTLS} for encryption is definitely needed and should be added first; encrypted data transfer is hardly possible without support for it. +\index{starttls} \subsubsection*{\TODO\,2: Authentication (\RF\,6)} @@ -544,7 +584,7 @@ \section{Ways for further development} -\index{development strategies} +\index{development!strategy} Knowing what needs to be done is only one part, the other is deciding \emph{how} to do it by focusing on a global development strategy. @@ -572,7 +612,6 @@ \input{tbl/strategies.tbl} \end{center} \caption{Development strategies and their suitability for requirements} - \index{figure!Development strategies and their suitability for requirements} \label{tab:strategies} \end{table} @@ -628,16 +667,17 @@ \subsubsection*{Effort estimation} -\index{effort estimation} +\index{development!work effort} Although a strategy might lead to the best result, one may choose another one if the required effort is too high. The effort for a redesign and rebuild is estimated now. \person{Wheeler}'s program \name{sloccount} calculates following estimations for \masqmail's code base as of version 0.2.21 (excluding library code): -\index{masqmail!development effort} +\index{lines of code} \codeinput{input/masqmail-sloccount.txt} The development costs in money are not relevant for a Free Software project with volunteer developers, but the development time is. About 24 man-months are estimated. The current code base was written almost completely by \person{Oliver Kurth} within four years in his spare time. This means he needed around twice as much time. Of course, he programmed as a volunteer developer not as an employee with eight work-hours per day. +\index{Free Software projects} Given the assumptions that (1) an equal amount of code needs to be produced for a new designed \masqmail, (2) a third of the existing code can be reused plus concepts and knowledge, and (3) development speed is like \person{Kurth}'s, then it would take between two and three years for one programmer to produce a redesigned new \masqmail\ with the same features that \masqmail\ now has. Less time would be needed if a simpler architecture allows faster development, better testing, and less bugs. Of course, more developers would speed it up, too. @@ -645,7 +685,7 @@ \subsubsection*{Risks} -\index{risks} +\index{development!risks} The gained result of a new design might still outweigh the development effort. But risks are something more to consider. @@ -674,16 +714,18 @@ \subsubsection*{Repairing} -\index{reparing} +\index{repair} Besides these advantages of existing code, one must not forget that further work on it is often repair work. Small bug fixes are not the problem, but adding something for which the software originally was not designed, will cause problems. Such work often destroys the clear concepts of the software, especially in interweaved monolithic code. \person{Doug McIlroy}, a person with important influence on Unix especially by inventing the Unix pipe, demands: ``To do a new job, build afresh rather than complicate old programs by adding new features.'' \cite{mcilroy78}. +\index{pipe} +\index{Unix} Repair strategies are useful, but only in the short-time view and in times of trouble. If the future is bright, however, one does best by investing into a software. As shown in section~\ref{sec:market-analysis-conclusion}, the future for \MTA{}s is bright. This means it is time to invest into a redesign with the intension to build up a more modern product. In the author's view is \masqmail\ already needing this redesign since about 2003 when the old design was still quite suitable \dots\ it already delayed too long. -\index{masqmail!redesign} +\index{masqmail!new design} %Clinging to much to existing code will be no help, it is an indicator for fear. Having the courage to through bad code away to make it better, shows the view forward. @@ -695,11 +737,13 @@ \subsubsection*{A guard against dead ends} -\index{dead ends} +\index{development!dead end} A new design does protect against such dead ends. Changing requirements are one possible dead end if the software does not evolve with them. A famous example is \sendmail; it had an almost monopoly for a long time. But when security became important, \sendmail\ was only repaired instead of the problem sources---its insecure design---would have been removed. Thus security problems reappeared and over the years \sendmail's market share shrank as more secure \MTA{}s became available. \sendmail's reaction to the new requirements, in form of \name{sendmail~X} and \name{MeTA1}, came much to late---the users already switched to other \MTA{}s. +\index{sendmail!sendmailx} +\index{sendmail!meta1} \index{sendmail} Redesigning a software as requirements change helps keeping it alive. @@ -708,7 +752,9 @@ The knowledge of \person{Heraclitus}, a Greek philosopher, shall be an inspriation: ``Nothing endures but change.'' Another danger is the dead end of complexity which is likely to appear by constant work on the same code base. It is even more likely if the code base has a monolithic architecture. A good example for simplicity is \qmail\ which consists of small independent modules, each with only about one thousand lines of code. Such simple code makes it obvious to understand what it does. The \name{suckless} project \citeweb{suckless.org} for example advertises such a philosophy of small and simple software by following the thoughts of the Unix inventors \cite{kernighan84} \cite{kernighan99}. Simple, small, and clear code avoids complexity and is thus also a strong prerequisite for security. -\index{suckless} +\index{qmail} +\index{suckless software} +\index{Unix} @@ -720,6 +766,7 @@ The avoidance of dead ends is essential for further development on current code, too. Hence it is mandatory to refactor the existing code base sooner or later. Most important is the intention to modularize it, as modularity improves many quality properties, eases further development, and essentially improves security. One example how modular structure makes it easy to add further functionality is described by \person{Sill}: He says that integrating the \name{amavis} filter framework into the \qmail\ system can be done by simply renaming the \path{qmail-queue} module to \path{qmail-queue-real} and then renaming the \path{amavis} executable to \path{qmail-queue} \cite[section~12.7.1]{sill02}. Nothing more in the \qmail\ system needs to be changed. This is a very admirable ability which is only possible in a modular system that consists of independent executables. +\index{qmail} \index{modularity} This thesis showed several times that modularity is a key property for good software design. Modularity can hardly be retrofitted into software, hence development on base of current code will need a throughout restructuring too, to modularize the source code. Thus a new design is similar to such a throughout refactoring, except the dependence on current code. @@ -737,6 +784,7 @@ This classifies current code to be better suited for adding functionality, and a new design to be better suited for quality improvements. Both strategies need to improve function as well as quality, however, the focus of the strategy is determined by this difference. Easier work is likely to be done earlier in Free Software projects than hard work. Thus, by choosing S\,1+2 volunteer developers tend to implement function first and delay quality improvements, no matter what the suggested order of the work tasks is. S\,3, in contrast, would benefit early quality improvements and later function improvements. This is real-life experience from Free Software development. +\index{Free Software projects} @@ -776,9 +824,9 @@ \subsubsection*{Good software, good feelings} -\index{good feeling} One last argument shall be added. This one is more common to Free Software but can also be found in non-free software. +\index{Free Software} Free Software ``sells'' if it has a good user base. For example: Although \qmail\ is somehow outdated and its author has not released any new version since about ten years, \qmail\ still has a very strong user base and community. \index{qmail} @@ -786,7 +834,7 @@ Good concepts, sound design, and a sane philosophy gives users good feelings for the software and faith in it. They become interested in using it and to contribute. In contrast do constant repaire work and reappearance of weaknesses leave a bad feeling. The motivation of most volunteer developers is their wish to do good work with the goal to create good software. Projects that follow admirable plans towards a good product will motivate volunteers to help. More helpers can get the 2,5 man-years for a new design in less absolute time done. Additionally is a good developers base the best start for a good user base, and users define a software's value. -\index{motivation} +\index{development!motivation} @@ -802,7 +850,7 @@ Strategy 3 (A new design) is slightly preferred over the combination of strategy 1 (Improve existing code) and 2 (Add wrappers and interposition filters), from the requirement's point of view. The discussion afterwards did generally support the new design strategy. But some arguments stood against it. These were: -\index{development strategy} +\index{development!strategy} \begin{enumerate} \item The development time and effort @@ -816,7 +864,7 @@ With respect to the current situation, the suggested further development plan for \masqmail\ is split into a short-time plan and a long-time plan: -\index{development goal} +\index{development!goal} \begin{enumerate} \item The short-time plan: Add the most needed features, namely encryption, authentication, and security wrappers, to the current code base. @@ -828,6 +876,6 @@ The basics of this development idea can be described as: Recurrent development of a new design from scratch, while the old version is still in use and gets repaired. Hence a modern design will inherit an old one in periodic intervals. This is a very future-proof concept that combines the best of short-term and long-term planning. The price to pay is only the increased work, which gets covered by volunteers that \emph{want} to do it. -\index{motivation} +\index{development!motivation} diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/5-Improvements.tex --- a/thesis/tex/5-Improvements.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/5-Improvements.tex Sun Feb 08 22:51:42 2009 +0100 @@ -20,6 +20,7 @@ Encryption (\TODO\,1) should be the first functionality to be added to the current code. The requirement was already discussed on page~\pageref{requirement-encryption}. As explained there, \NAME{STARTTLS} encryption---defined in \RFC\,2487---should be added to \masqmail. \index{starttls} +\index{rfc} This work requires changes mainly in three source files: \path{smtp_in.c}, \path{smtp_out.c}, and \path{conf.c}. @@ -29,6 +30,7 @@ The second file includes the functionality for the \SMTP\ client. It should start the encryption by issuing the \NAME{STARTTLS} keyword if the server supports it. It should be possible to send messages over encrypted channels only. The third file controls the configuration files. New configuration options need to be added. The encryption policy for incoming connections needs to be defined. Three choices seem necessary: no encryption, offer encryption, insist on encryption. The encryption policy for outgoing connections should be part of each route setup. The options are the same: never encrypt, encrypt if possible, insist on encryption. +\index{configuration} \subsubsection*{Depencencies} @@ -47,6 +49,7 @@ \person{Frederik Vermeulen} wrote an encryption patch for \qmail\ which adds \NAME{STARTTLS} support \citeweb{qmail:tls-patch}. This patch includes about 500 lines of code. \index{qmail} +\index{starttls} Adding this code in a similar form to \masqmail\ will be fairly easy. It will save a lot of work as it is not necessary to write the code completely from scratch. @@ -60,8 +63,10 @@ Authentication (\TODO\,2) is the second function to be added. It is important to restrict the access to \masqmail, especially for mail relay. The requirements for authentication where identified on page~\pageref{requirement-authentication}. -Static access restriction, based on the \NAME{IP} address is already possible by using \NAME{TCP} \name{Wrappers}. This makes it easy to refuse all connections from outside the local network for example, which is a good prevention against being an open relay. More detailed static restrictions, like splitting between mail for users on the system and mail for relay, should \emph{not} be added to the current code. This is a concern for the new design. -\index{tcp wrappers} +Static access restriction, based on the \NAME{IP} address is already possible by using \NAME{TCP} \name{Wrapper}. This makes it easy to refuse all connections from outside the local network for example, which is a good prevention against being an open relay. More detailed static restrictions, like splitting between mail for users on the system and mail for relay, should \emph{not} be added to the current code. This is a concern for the new design. +\index{access restriction} +\index{tcp wrapper} +\index{open relay} \subsubsection*{One of the dynamic methods} @@ -71,7 +76,7 @@ Authentication based on certificates does suffer from the certificate infrastructure that is required. Although certificates are already used for encryption, its management overhead prevented wide spread usage for authentication. \SMTP\ authentication (also referred to as \NAME{SMTP-AUTH}) support is easiest attained by using a \name{Simple Authentication and Security Layer} (short: \NAME{SASL}) implementation. \person{Dent} sees in \NAME{SASL} the best solution for dynamic authentication of users: -\index{smtp-auth} +\index{auth!smtp-auth} \index{sasl} \begin{quote} @@ -80,12 +85,15 @@ \end{quote} These days \NAME{SMTP-AUTH}---defined in \RFC\,2554---is supported by almost all email clients. If encryption is used then even insecure authentication methods like \NAME{PLAIN} and \NAME{LOGIN} become secure. +\index{rfc} \subsubsection*{Simple Authentication and Security Layer} \index{sasl} \masqmail\ best uses an available \NAME{SASL} library. \name{Cyrus} \NAME{SASL} is used by \postfix\ and \sendmail. It is a complete framework that makes use of existing authentication concepts like the \path{passwd} file or \NAME{PAM}. As advantage it can be included in existing user data bases. \name{gsasl} is an alternative. It comes as a library which helps with the decision for a method and with generating the appropriate dialog data; the actual transmission of the data and the authentication against some database is left open to the programmer. \name{gsasl} is used, for instance, by \name{msmtp}. It seems best to give both concepts a try and decide then which one to use. +\index{sendmail} +\index{postfix} \index{cyrus sasl} \index{pam} \index{gsasl} @@ -95,6 +103,8 @@ Authentication needs code changes in the same places as encryption. The relevant code files are \path{smtp_in.c}, \path{smtp_out.c}, and \path{conf.c}. The server code, to authenticate clients, must be added to \path{smtp_in.c} and the configuration options to \path{conf.c}. Several configuration options should be provided: the authentication policy (no authentication, offer authentication, insist on authentication), the authentication backend (if several are supported), an option to refuse plain text methods (\NAME{PLAIN} and \NAME{LOGIN}), and one to require encryption before authentication. +\index{configuration} +\index{plain text} If the authentication code for outgoing connects shall be changed too, it must be done in \path{smtp_out.c}. The configuration options are already present. @@ -131,8 +141,11 @@ The advantage of mail security layers is that the \MTA\ itself needs not to bother much with untrusted environments. The proxy cares for this. \name{smap} is non-free software and thus no general choice for \masqmail. A way to achieve a similar setup is to copy \masqmail\ and strip one copy to the bare minimum of what is needed for the proxy job. \name{setuid} could be removed, and root privilege too if \name{inetd} is used. This hardens the proxy instance. +\index{Free Software} \index{inetd} \index{proxy} +\index{root privilege} +\index{setuid} Mail from outside would then come through the proxy into the system. Mail from the local host and from the local network could be directly accepted by the normal \masqmail, if those locations are considered trusted. But it seems better to have them use the proxy, too, or maybe a second proxy instance with different policy. \index{policy} @@ -143,15 +156,13 @@ \subsubsection*{A concrete setup} A stripped down proxy needs to be created. It should only be able to receive mail via \SMTP, encrypt the communication, authenticate clients, and send mail out via \SMTP\ to an internal socket (named ``X'' in the figure). This is a straight forward task. The normal \masqmail\ instance runs on the system, too. It takes input from \name{stdin} (when the \path{sendmail} command is invoked) and via \SMTP\ where it listens on an internal socket (named ``X'' in the figure). Outgoing mail is handled without difference to a regular setup. Figure~\ref{fig:proxy-setup} depicts the setup. -\index{auth} -\index{enc} +\index{sendmail!command} \begin{figure} \begin{center} \includegraphics[scale=0.75]{fig/proxy-setup.eps} \end{center} \caption{A setup with a proxy} - \index{figure!A setup with a proxy} \label{fig:proxy-setup} \end{figure} @@ -211,7 +222,7 @@ \index{sendmail!command} A bit different is the structure of \name{sendmail~X} at that point: Locally submitted messages go also to the \SMTP\ daemon, which is the only connection to the mail queue. \person{Finch} proposes a similar approach \cite{finch-sendmail}: He wants the \path{sendmail} command to be a simple \SMTP\ client that contacts the \SMTP\ daemon of the \MTA, like it is done by connections from remote. The advantage here is to have one single module where all \SMTP\ dialog with submitters is done. Hence one single point to accept or refuse incoming mail. Additionally does the module which puts mail into the queue not need to be \name{setuid} or \name{setgid}, because it is only invoked from the \SMTP\ daemon. The \MTA's architecture would become simpler and common tasks are not duplicated in modules that do similar jobs. -\index{sendmailx} +\index{sendmail!sendmailx} \index{smtp} \index{setuid} @@ -234,12 +245,19 @@ Outgoing mail is commonly either sent using \SMTP, piped into local commands (for example \path{uucp}), or delivered locally by appending to a mailbox. The requirements were identified on page~\pageref{rf1}. \index{uucp} +\index{pipe} Outgoing channels are similar for \qmail, \postfix, and \name{sendmail~X}: All of them have a module to send mail using \SMTP\ and one for writing into a local mailbox. Local mail delivery is a job that should have root privilege to be able to switch to any user in order to write to his mailbox. Modular \MTA{}s do not require \name{setuid root} but the local delivery process (or its parent) should run as root. root privilege is not a mandatory requirement but any other approach has some disadvantages thus commonly root privilege is used. +\index{postfix} +\index{qmail} +\index{sendmail!sendmailx} \index{setuid} +\index{root privilege} Local mail delivery should not be done by the \MTA, but by an \NAME{MDA} instead. This decision was discussed in section~\ref{sec:functional-requirements}. This means only an outgoing channel that pipes mail into a local command is required for local delivery. \index{local delivery} +\index{mda} +\index{pipe} Other outgoing channels, one for each supported protocol, should be designed like it was done in other \MTA{}s. @@ -249,10 +267,12 @@ \index{mail queue} The mail queue is the central part of an \MTA. This fact demands especially for robustness and reliability as a failure here can lead to mail loss. (See \RF\,2 on page~\pageref{rf2}.) +\index{mail loss} Common \MTA{}s feature one or more mail queues, they sometimes have effectively several queues within one physical representation. \MTA\ setups that include content scanning tend to require two separate queues. To use \sendmail\ in such setups requires two independent instances with one own queue each. \exim\ can handle it with special \name{router} and \name{transport} rules but the data flow gets complicated. Hence an idea is to use two queues (\name{incoming} and \name{active} in \postfix's terminology) and have the content scanning within the move from the one to the other. +\index{sendmail} \index{exim} \index{postfix} @@ -262,6 +282,8 @@ \index{database system} Hence the choice here is having a directory with simple text files in it. This is straight forward, simple, clear, and general \dots\ and thus a good basis for reliability. It is additionally always an advantage if data is stored in the operating system's natural form, which is plain text in the Unix' case. +\index{Unix} +\index{plain text} Robustness of the queue is covered in the next section. @@ -285,18 +307,21 @@ \person{Jon Postel}'s robustness principle\footnote{``Be liberal in what you accept, and conservative in what you send.''. In this wording in \RFC\,1122 and in different wordings in numerous \RFC{}s} should be respected in the \name{scanning} module. The module should parse the given input in a liberal way and generate clean output. \person{Raymond}'s \name{Rule of Repair}\footnote{``Repair what you can -- but when you must fail, fail noisily and as soon as possible.'' \cite[page~18]{raymond03}} can be applied, too. But it is important to repair only obvious problems, because repairing functionality is likely a target for attacks. \index{robustness!principle of} +\index{repair!rule of} +\index{rfc} \subsubsection*{Aliasing} -\index{aliases} +\index{alias expansion} The functional requirements were identified under \RF\,4 on page~\pageref{rf4}. From the architectural point of view, the main question about aliasing is: Where should aliases get expanded? Two facts are important to consider: (1) Addresses that expand to a list of users lead to more envelopes. (2) Aliases that change the recipient's domain part may make the message unsuitable for a specific online route. Aliasing is often handled by expanding the alias and re-injecting the mail into the system. Unfortunately, the mail is processed twice then; additionally does the system have to handle more mail this way. If it is wanted to check the new recipient address for acceptance and do all processing again, then re-injecting it is the best choice. But already accepted messages may get rejected in the second go, though the replacement address was set inside the system. This seems not to be wanted. +\index{smtp!rejecting} Doing the alias expansion in the \name{scanning} module appears to be the best solution. Unfortunately, a second alias expansion must be made on delivery, because only then is clear which route is used for the message. This compromise should get accepted. @@ -315,6 +340,7 @@ The best point to archive copies of every incoming mail is the \name{queue-in} module, respectively the \name{queue-out} module for copies of outgoing mail. But the changes that are made by the receiving modules (adding further headers) and sending modules (address rewrites) are not respected with this approach. \qmail\ has the ability to log complete \SMTP\ dialogs. Logging the complete data transaction into and out of the system is a great feature which should be implemented into each receiving and sending module. Though, as this will produce a huge amount of output, it should be disabled by default. +\index{qmail} \index{smtp!dialog} Archiving's functional requirements were described as \RF\,10 on page~\pageref{rf10}. @@ -348,12 +374,15 @@ \begin{enumerate} \item Refusing spam during the \SMTP\ dialog: This is the way it was meant by the designers of the \SMTP\ protocol. They thought checking the sender's and recipient's mail addresses would be enough, but as they are forgeable, it is not. More and more complex checks are needed to be done. Checking needs time, but \SMTP\ dialogs time out if it takes too long. Thus during the \SMTP\ dialog, only limited time can be used for checking if a message seems to be spam. The advantage of this approach is that bad messages can simply get refused---no responsibility for them is taken and no further system load is added. See \RFC\,2505 (especially section 1.5) for detail. +\index{smtp!responsibility} \index{smtp!dialog} +\index{rfc} \item Checking for spam after the mail was accepted and queued: Here it is possible to invest more processing time, thus more detailed checks can be done. But, as responsibility for messages was taken, it is no choice to simply delete spam mail. Checks for spam do not lead to sure results, they just indicate the possibility the message is unwanted mail. \person{Eisentraut} lists actions to take after a message is recognized as probably spam \cite[pages 18--20]{eisentraut05}. For mail the \MTA\ is responsible for, the only acceptable action is adding further or rewriting existing header lines. Thus all further work on the spam messages is the same as for non-spam messages. \end{enumerate} Modern \MTA{}s use both techniques in combination. Checks during the \SMTP\ dialog tend to be implemented in the \MTA\ to make them fast; checks after the message was queued are often done using external programs (\name{spamassassin} is a well known one). \person{Eisentraut} sees the checks during the \SMTP\ dialog to be essential: ``Ganz ohne Analyse w\"ahrend der \SMTP-Phase kommt sowieso kein \MTA\ aus, und es ist eine Frage der Einsch\"atzung, wie weit man diese Phase belasten m\"ochte.'' \cite[page 25, (translated: ``No \MTA\ can go without analysis during the \SMTP\ phase anyway, but the amount of stress one likes to put on this phase is left to his discretion.'')]{eisentraut05} +\index{spamassassin} Checks before a message is accepted, like \NAME{DNS} blacklists and \name{greylisting}, need to be invoked from within the receiving modules. Like for authentication and encryption, the implementation of this functionality should be provided by a central source. \index{dns blacklist} @@ -406,7 +435,6 @@ \includegraphics[width=\textwidth]{fig/masqmail-arch-new.eps} \end{center} \caption{The new designed architecture for \masqmail} - \index{figure!The new designed architecture for \masqmail} \label{fig:masqmail-arch-new} \end{figure} @@ -425,7 +453,9 @@ \paragraph{Receiver modules} \index{incoming channels} They are the communication interface between external senders and the \name{queue-in} module. Each protocol needs a corresponding \name{receiver module} to be supported. Most popular is the \name{sendmail} module, which is a command to be called from the local host, and the \name{smtpd} module which usually listens on port 25. Other modules to support other protocols may be added as needed. Receiving modules that need to listen on ports should get invoked by \name{inetd}, or by \person{Bernstein}'s more secure \name{ucspi-tcp}. This makes it possible to run them with least privilege. -\index{least privilege} +\index{inetd} +\index{inetd!ucspi-tcp} +\index{least privilege, principle of} \paragraph{The \name{queue-in} module} @@ -445,6 +475,10 @@ \paragraph{Transport modules} \index{outgoing channels} These modules send outgoing mail; they are the interface between \name{queue-out} and remote hosts or local commands. The most popular modules of this kind are the \name{smtp} module which acts as an \SMTP\ client and the \name{pipe} module to interface gateways to other systems or networks like \NAME{FAX} and \NAME{UUCP}. A module for local delivery is not included; \masqmail\ passes this job to an \NAME{MDA} which gets invoked through the \name{pipe} module. (See section~\ref{sec:functional-requirements} for reasons.) +\index{fax} +\index{gateway} +\index{mda} +\index{pipe} @@ -455,14 +489,14 @@ The queuing system consists of two queues and a message pool. The queues store the spool files---in unprocessed form in \name{incoming} and in complete and valid form in \name{outgoing}. The \name{pool} is the storage of the data files. On disk, the three parts of the queuing system are represented by three directories within the queue path. The representation of queued messages on disk is basically the same as in current \masqmail: One file for the envelope and message header information (the ``spool file'') and a second file for the message body (the ``data file''). -\index{spool file} -\index{data file} The currently used internal structure of the spool files can remain. Following is a sample spool file from current \masqmail. The first part is the envelope and meta information. The annotations in parenthesis are only added to ease the understanding. The second part, after the empty line, is the message header. \codeinput{input/sample-spool-file.txt} The spool file owner's executable bit shows if a file is ready for further processing: The module that writes the file into the queue sets the bit as last action. Modules that read from the queue can process messages that have the bit set. This approach is derived from \postfix. +\index{executable bit} +\index{postfix} The data file is stored into the \name{pool} by \name{queue-in}; it never gets modified until it is deleted by \name{queue-out}. They consist of data in local default text format. @@ -474,13 +508,14 @@ \index{ipc} Communication between modules is required to exchange data and status information. This is also called ``Inter-process communication'' (short: \NAME{IPC}) because the modules are independent programs in this case and processes are programs in execution. +\index{ipc} The connections between \name{queue-in} and \name{scanning}, as well as between \name{scanning} and \name{queue-out}, is provided by the queues, only signals might be useful to trigger runs. Communication between receiver and transport modules and the outside world is organized by their specific protocol (e.g.\ \SMTP). Left is only the communication between the receiver modules and \name{queue-in}, and between \name{queue-out} and the transport modules. Suggested for this communication is a simple protocol with data exchange through Unix pipes. Figure~\ref{fig:ipc-protocol} shows a state diagram for the protocol. +\index{pipe} The protocol is described in more detail now: -\index{protocol} \paragraph{Timing} One dialog consists of exactly three phases: (1) The connection attempt, (2) The envelope and header transfer, and (3) The transfer of the message body. The order is always the same. The three phases are all initiated by the client process. After each phase the server process sends a success or failure reply. Timeouts for each phase need to be implemented. @@ -490,7 +525,6 @@ \includegraphics[scale=0.75]{fig/ipc-protocol.eps} \end{center} \caption{State diagram of the \NAME{IPC} protocol. (Solid lines indicate client actions, dashed lines indicate server responses.)} - \index{figure!State diagram of the \NAME{IPC} protocol.} \label{fig:ipc-protocol} \end{figure} @@ -501,6 +535,10 @@ \paragraph{Syntax} Data transfer is done by sending plain text data. \name{Line Feed} (`\texttt{\textbackslash{}n}')---the native line separator on Unix---is used as line separator. The terminator sequence used to indicate the end of the data transfer is the \NAME{ASCII} \name{null} character (`\texttt{\textbackslash0}'). Replies are one-digit numbers with `\texttt{0}' meaning success and any other number (`\texttt{1}'--`\texttt{9}') indicating failure. +\index{Line Feed} +\index{ascii} +\index{Unix} +\index{plain text} @@ -510,6 +548,10 @@ \index{permission} The set of system users that is required for \qmail\ seems to be too complex for \masqmail. One system user, like \postfix\ uses, is more appropriate. \name{root} privilege and \name{setuid} permission should to be avoided if feasible. +\index{system user management} +\index{postfix} +\index{qmail} +\index{root privilege} The \name{queue-in} module is the part of the system that is most critical about permission. It either needs to run as deamon or be \name{setuid} or \name{setgid} in order to avoid a world-writable queue. \person{Ian~R.\ Justman} recommends to use \name{setgid} in this situation: \index{setuid} @@ -522,14 +564,20 @@ \person{Bernstein} chose \name{setuid} for the \name{qmail-queue} module, \person{Venema} uses \name{setgid} in \postfix, yet the differences are small. Better than running the module as a deamon is each of them. A deamon needs more resources and therefore becomes inefficient on systems with low mail amount, like the ones \masqmail\ will probably run on. Short running processes are additionally higher obstacles for intruders, because a process will die soon if an intruder managed to take one over. \index{qmail} \index{postfix} +\index{setuid} The modules \name{scanning} and \name{queue-out} are candidates for all-time running daemon processes. Alternatively they could be started by \name{cron} to do single runs. +\index{cron} Another possibility is to run a master process as daemon which starts and restarts the system parts. \postfix\ has such a master process, \qmail\ lacks it. The jobs of a master process can be done by other tools of the operating system too, thus making a master process abdicable. \masqmail\ does probably better go without a master process, because it aims to save resources, not to get the best performance. +\index{qmail} \index{master process} A sane permission management is very important for secure software in general. The \name{principle of least privilege} \cite[section~I.A.3.f]{saltzer75}, as it is often called, should be respected. If it is possible to use lower privilege then it should be done. An example for doing so is the \name{smtpd} module. It is a server module which listens on a port. One way is to start it as root and let it bind to the port and drop all privilege before it does any other work. But root privilege is avoidable completely if \name{inetd}, or one of its substitutes, listens on the port instead of the \name{smtpd} module. \name{inetd} will then launch the \name{smtpd} module to handle the connection whenever a connection attempt to the port is made. The \name{smtpd} module needs no privilege at all this way. +\index{least privilege, principle of} +\index{inetd} +\index{root privilege} diff -r d6ff5728dcd1 -r e57129f57faa thesis/tex/rfcs.tex --- a/thesis/tex/rfcs.tex Sat Feb 07 23:48:48 2009 +0100 +++ b/thesis/tex/rfcs.tex Sun Feb 08 22:51:42 2009 +0100 @@ -1,8 +1,10 @@ \chapter*{Requests for Comments} -\name{Requests for Comments} are the documents that propose or define Internet standards and best practices. They are controlled by the \name{Internet Engeneering Task Force} (short: \NAME{IETF}) \citeweb{ietf:homepage}. +\name{Requests for Comments} are the documents that propose or define Internet standards and best practices. They are controlled by the \name{Internet Engineering Task Force} (short: \NAME{IETF}) \citeweb{ietf:homepage}. +\index{ietf} A particular \RFC\ is located at {\small\url{http://tools.ietf.org/rfc/rfcNNNN.txt}}\,, where ``\texttt{NNNN}'' is the four-digit number of that \RFC. For example is \RFC\,821 located at {\small\url{http://tools.ietf.org/rfc/rfc0821.txt}}\,. +\index{rfc} More comfortable to browse are the \NAME{HTML} formated representations which contain navigation hyperlinks. They are accessable at {\small\url{http://tools.ietf.org/html/rfcNNNN}}\,. @@ -22,3 +24,10 @@ \item[\RFC\,5322] \emph{Internet Message Format} \end{description} +\index{smtp} +\index{mail message} +\index{smtps} +\index{spam} +\index{auth!smtp-auth} +\index{starttls} +