[darcs-users] darcs patch: Add tests for email header formatting (and 2 more)
Eric Kow
kowey at darcs.net
Sun Mar 15 18:03:47 UTC 2009
Hi Petr,
Could I get the final go-ahead on this one, please?
Thanks!
On Sun, Mar 15, 2009 at 17:46:41 +0100, Reinier Lamers wrote:
> Resolve oversight (an import line in External.hs), and a conflict with
> Trent's changes. And a wake-up call to potential reviewers :)
>
> Mon Mar 9 21:37:39 CET 2009 Reinier Lamers <tux_rocker at reinier.de>
> * Add tests for email header formatting
>
> Sun Mar 15 16:21:04 CET 2009 Reinier Lamers <tux_rocker at reinier.de>
> * resolve issue1358: encode non-ASCII characters in mail headers
>
> Sun Mar 15 17:22:54 CET 2009 Reinier Lamers <tux_rocker at reinier.de>
> * Kill unused imports in External.hs
>
Add tests for email header formatting
-------------------------------------
> Reinier Lamers <tux_rocker at reinier.de>**20090309203739
> Ignore-this: 1f0357f84f64446d4f4d8ea98b05b383
> ] hunk ./src/unit.lhs 53
> import System.IO.Unsafe ( unsafePerformIO )
> import ByteStringUtils
> import qualified Data.ByteString.Char8 as BC ( unpack, pack )
> -import qualified Data.ByteString as B ( empty, concat )
> +import qualified Data.ByteString as B ( empty, concat, length, unpack, foldr,
> + cons, ByteString, null, filter, head )
> +import Data.Char ( isPrint )
> import Darcs.Patch
> import Darcs.Patch.Test
> import Darcs.Patch.Unit ( run_patch_unit_tests )
> hunk ./src/unit.lhs 71
> import Control.Monad.ST
> import Darcs.Ordered
> import Darcs.Sealed ( Sealed(Sealed), unsafeUnseal )
> +import Darcs.Email ( make_email, read_email, formatHeader )
>
> hunk ./src/unit.lhs 73
> -import Darcs.Email ( make_email, read_email )
> #include "impossible.h"
> \end{code}
>
> hunk ./src/unit.lhs 103
> BC.unpack (read_email (renderPS
> $ make_email "reponame" [] (Just (text "contents\n"))
> (text $ unlines s) (Just "filename")))
> + putStr "Checking email header line length... "
> + quickCheck email_header_no_long_lines
> + putStr "Checking email for illegal characters... "
> + quickCheck email_header_ascii_chars
> + putStr "Checking for spaces at beginning of folded email header lines... "
> + quickCheck email_header_lines_start
> + putStr "Checking that there are no empty lines in email headers... "
> + quickCheck email_header_no_empty_lines
> --putStr $ test_patch
> --exitWith ExitSuccess
> case run_tests returnval of
> hunk ./src/unit.lhs 238
> = (thetest p1 p2)++(pair_unit_tester thetest ps)
> \end{code}
>
> +\chapter{Email format tests}
> +
> +These tests check whether the emails generated by darcs meet a few criteria.
> +We check for line length and non-ASCII characters. We apparently do not have to
> +check for CR-LF newlines because that's handled by sendmail.
> +
> +\begin{code}
> +
> +-- Check that formatHeader never creates lines longer than 78 characters
> +-- (excluding the carriage return and line feed)
> +email_header_no_long_lines :: String -> String -> Bool
> +email_header_no_long_lines field value =
> + not $ any (>78) $ map B.length $ bs_lines $ formatHeader cleanField value
> + where cleanField = clean_field_string field
> +
> +bs_lines :: B.ByteString -> [B.ByteString]
> +bs_lines = finalizeFold . B.foldr splitAtLines (B.empty, [])
> + where splitAtLines 10 (thisLine, prevLines) = (B.empty, thisLine:prevLines)
> + splitAtLines c (thisLine, prevLines) = (B.cons c thisLine, prevLines)
> + finalizeFold (lastLine, otherLines) = lastLine : otherLines
> +
> +-- Check that an email header does not contain non-ASCII characters
> +-- formatHeader doesn't escape field names, there is no such thing as non-ascii
> +-- field names afaik
> +email_header_ascii_chars :: String -> String -> Bool
> +email_header_ascii_chars field value
> + = not (any (>127) (B.unpack (formatHeader cleanField value)))
> + where cleanField = clean_field_string field
> +
> +-- Check that header the second and later lines of a header start with a space
> +email_header_lines_start :: String -> String -> Bool
> +email_header_lines_start field value =
> + all (\l -> B.null l || B.head l == 32) (tail headerLines)
> + where headerLines = bs_lines (formatHeader cleanField value)
> + cleanField = clean_field_string field
> +
> +-- Checks that there are no lines in email headers with only whitespace
> +email_header_no_empty_lines :: String -> String -> Bool
> +email_header_no_empty_lines field value =
> + all (not . B.null . B.filter (not . (`elem` [10, 32, 9]))) headerLines
> + where headerLines = bs_lines (formatHeader cleanField value)
> + cleanField = clean_field_string field
> +
> +clean_field_string :: String -> String
> +clean_field_string = filter (\c -> isPrint c && c < '\x80' && c /= ':')
> +
> +\end{code}
> +
> \chapter{LCS}
>
> Here are a few quick tests of the shiftBoundaries function.
> addfile ./tests/emailformat.sh
> hunk ./tests/emailformat.sh 1
> +#!/usr/bin/env bash
> +
> +set -ev
> +# TODO: is this really enough to make all commands interpret the given strings
> +# as latin1?
> +export LANG="en_US.ISO-8859-1"
> +
> +rm -rf temp1
> +rm -rf temp2
> +mkdir temp1
> +mkdir temp2
> +cd temp1
> +
> +seventysevenaddy="<aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa at bbbbbbbbbb.cccccccccc.abrasoft.com>"
> +
> +darcs init
> +
> +echo "Have you seen the sm?rrebr?d of Ren? ?av?sant?" > non_ascii_file
> +darcs add non_ascii_file
> +darcs record -am "non-ascii file add" -A test
> +
> +cd ../temp2
> +darcs init
> +cd ../temp1
> +
> +# long email adress: check that email adresses of <= 77 chars don't get split up
> +darcs send --from="Kj?lt ?berstr?m $seventysevenaddy" \
> + --subject "Un patch pour le r?positoire" \
> + --to="Un gar?on fran?ais <garcon at francais.fr>" \
> + --sendmail-command='cp /dev/stdin mail_as_file %<' \
> + -a ../temp2
> +
> +cat mail_as_file
> +# The long mail address should be in there as a whole
> +grep $seventysevenaddy mail_as_file
> +
> +# Check that there are no non-ASCII characters in the mail
> +ghc -e 'getContents >>= return . not . any (> Data.Char.chr 127)' < mail_as_file | grep '^True$'
> +
> +
> +cd ..
> +rm -rf temp1
> +rm -rf temp2
> +
resolve issue1358: encode non-ASCII characters in mail headers
--------------------------------------------------------------
> Reinier Lamers <tux_rocker at reinier.de>**20090315152104
> Ignore-this: 1006f69ae92586298ab4949813b0f288
> ] hunk ./src/Darcs/Email.hs 3
> {-# OPTIONS_GHC -cpp #-}
> {-# LANGUAGE CPP #-}
> -module Darcs.Email ( make_email, read_email ) where
> +module Darcs.Email ( make_email, read_email, formatHeader ) where
>
> hunk ./src/Darcs/Email.hs 5
> -import Data.Char ( digitToInt, isHexDigit )
> +import Data.Char ( digitToInt, isHexDigit, ord, intToDigit, isPrint, toUpper )
> +import Data.List ( isInfixOf )
> +import qualified UTF8 ( encode )
> import Printer ( Doc, ($$), (<+>), (<>), text, empty, packedString, renderPS)
>
> import ByteStringUtils (dropSpace, linesPS, betweenLinesPS )
> hunk ./src/Darcs/Email.hs 11
> -import qualified Data.ByteString as B (ByteString, length, null, tail, drop, head)
> +import qualified Data.ByteString as B (ByteString, length, null, tail
> + ,drop, head, concat, singleton
> + ,pack, append, empty
> + )
> import qualified Data.ByteString.Char8 as BC (index, head, pack)
> #if __GLASGOW_HASKELL__ > 606
> import Data.ByteString.Internal as B (c2w, createAndTrim)
> hunk ./src/Darcs/Email.hs 26
> import Foreign.Storable ( poke )
> import Data.Word ( Word8 )
>
> -line_max :: Int
> -line_max = 75
> +-- line_max is maximum number of characters in an e-mail line excluding the CRLF
> +-- at the end. qline_max is the number of characters in a q-encoded or
> +-- quoted-printable-encoded line.
> +line_max, qline_max :: Int
> +line_max = 78
> +qline_max = 75
> +
> +-- | Formats an e-mail header by encoding any non-ascii characters using UTF-8
> +-- and Q-encoding, and folding lines at appropriate points. It doesn't do
> +-- more than that, so the header name and header value should be
> +-- well-formatted give or take line length and encoding. So no non-ASCII
> +-- characters within quoted-string, quoted-pair, or atom; no semantically
> +-- meaningful signs in names; no non-ASCII characters in the header name;
> +-- etcetera.
> +formatHeader :: String -> String -> B.ByteString
> +formatHeader headerName headerValue =
> + B.append nameColon encodedValue
> + where nameColon = B.pack (map B.c2w (headerName ++ ":")) -- space for folding
> + encodedValue = fold_and_encode (' ':headerValue)
> + (B.length nameColon) False False
> +
> +-- run through a string and encode non-ascii words and fold where appropriate.
> +-- the integer argument is the current position in the current line.
> +-- the string in the first argument must begin with whitespace, or be empty.
> +fold_and_encode :: String -> Int -> Bool -> Bool -> B.ByteString
> +fold_and_encode [] _ _ _ = B.empty
> +fold_and_encode s p lastWordEncoded inMidWord =
> + let newline = B.singleton 10
> + space = B.singleton 32
> + s2bs = B.pack . map B.c2w
> + -- the twelve there is the max number of ASCII chars to encode a single
> + -- character: 4 * 3, 4 UTF-8 bytes times 3 ASCII chars per byte
> + safeEncChunkLength = (qline_max - B.length encoded_word_start
> + - B.length encoded_word_end) `div` 12
> + (curSpace, afterCurSpace) = break (not . (== ' ')) s
> + (curWord, afterCurWord) = break (== ' ') afterCurSpace
> + qEncWord | lastWordEncoded = qEncode (curSpace ++ curWord)
> + | otherwise = qEncode curWord
> + mustEncode = inMidWord
> + || any (\c -> not (isPrint c) || (ord c) > 127) curWord
> + || length curWord > line_max - 1
> + || isInfixOf "=?" curWord
> + mustFold
> + | mustEncode && lastWordEncoded
> + = p + 1 + B.length qEncWord > line_max
> + | mustEncode
> + = p + length curSpace + B.length qEncWord > line_max
> + | otherwise
> + = p + length curSpace + length curWord > line_max
> + mustSplit = (B.length qEncWord > qline_max && mustEncode)
> + || length curWord > line_max - 1
> + spaceToInsert | mustEncode && lastWordEncoded = space
> + | otherwise = s2bs curSpace
> + wordToInsert
> + | mustEncode && mustSplit = qEncode (take safeEncChunkLength curWord)
> + | mustEncode = qEncWord
> + | otherwise = s2bs curWord
> + doneChunk | mustFold = B.concat [newline, spaceToInsert, wordToInsert]
> + | otherwise = B.concat [spaceToInsert, wordToInsert]
> + (rest, nextP)
> + | mustSplit
> + = (drop safeEncChunkLength curWord ++ afterCurWord, qline_max + 1)
> + | mustEncode && mustFold
> + = (afterCurWord, B.length spaceToInsert + B.length wordToInsert)
> + | otherwise
> + = (afterCurWord, p + B.length doneChunk)
> + in B.append doneChunk (fold_and_encode rest nextP mustEncode mustSplit)
> +
> +-- | Turns a piece of string into a q-encoded block
> +-- Applies q-encoding, for use in e-mail header values, as defined in RFC 2047.
> +-- It just takes a string and builds an encoded-word from it, it does not check
> +-- length or necessity.
> +qEncode :: String -> B.ByteString
> +qEncode s = B.concat [encoded_word_start,
> + encodedString,
> + encoded_word_end]
> + where encodedString = B.concat (map q_encode_char s)
> +
> +encoded_word_start, encoded_word_end :: B.ByteString
> +encoded_word_start = B.pack (map B.c2w "=?UTF-8?Q?")
> +encoded_word_end = B.pack (map B.c2w "?=")
> +
> +-- turns a character into its q-encoded bytestring value. For most printable
> +-- ASCII characters, that's just the singleton bytestring with that char.
> +q_encode_char :: Char -> B.ByteString
> +q_encode_char c
> + | c == ' ' = c2bs '_'
> + | isPrint c
> + && not (c `elem` ['?', '=', '_'])
> + && ord c < 128 = c2bs c
> + | otherwise = B.concat (map qbyte (UTF8.encode [c]))
> + where c2bs = B.singleton . B.c2w
> + -- qbyte turns a byte into its q-encoded "=hh" representation
> + qbyte b = B.pack (map B.c2w ['='
> + ,word8ToUDigit (b `div` 16)
> + ,word8ToUDigit (b `mod` 16)
> + ])
> + word8ToUDigit :: Word8 -> Char
> + word8ToUDigit = toUpper . intToDigit . fromIntegral
>
> -- TODO is this doing mime encoding??
> qpencode :: B.ByteString -> B.ByteString
> hunk ./src/Darcs/Email.hs 130
> qpencode s = unsafePerformIO
> -- Really only (3 + 2/75) * length or something in the worst case
> - $ B.createAndTrim (4 * B.length s) (\buf -> encode s line_max buf 0)
> + $ B.createAndTrim (4 * B.length s) (\buf -> encode s qline_max buf 0)
>
> encode :: B.ByteString -> Int -> Ptr Word8 -> Int -> IO Int
> encode ps _ _ bufi | B.null ps = return bufi
> hunk ./src/Darcs/Email.hs 137
> encode ps n buf bufi = case B.head ps of
> c | c == newline ->
> do poke (buf `plusPtr` bufi) newline
> - encode ps' line_max buf (bufi+1)
> + encode ps' qline_max buf (bufi+1)
> | n == 0 && B.length ps > 1 ->
> do poke (buf `plusPtr` bufi) equals
> poke (buf `plusPtr` (bufi+1)) newline
> hunk ./src/Darcs/Email.hs 141
> - encode ps line_max buf (bufi + 2)
> + encode ps qline_max buf (bufi + 2)
> | (c == tab || c == space) ->
> if B.null ps' || B.head ps' == newline
> then do poke (buf `plusPtr` bufi) c
> hunk ./src/Darcs/Email.hs 147
> poke (buf `plusPtr` (bufi+1)) equals
> poke (buf `plusPtr` (bufi+2)) newline
> - encode ps' line_max buf (bufi + 3)
> + encode ps' qline_max buf (bufi + 3)
> else do poke (buf `plusPtr` bufi) c
> encode ps' (n - 1) buf (bufi + 1)
> | (c >= bang && c /= equals && c <= tilde) ->
> hunk ./src/Darcs/External.hs 60
> import ByteStringUtils (gzReadFilePS, linesPS, unlinesPS)
> import qualified Data.ByteString as B (ByteString, empty, null, readFile -- ratify readFile: Just an import from ByteString
> ,hGetContents, writeFile, hPut, length -- ratify hGetContents: importing from ByteString
> - ,take, concat, drop, isPrefixOf)
> + ,take, concat, drop, isPrefixOf, singleton, append)
> import qualified Data.ByteString.Char8 as BC (unpack, pack)
>
> import Darcs.Lock ( withTemp, withOpenTemp, tempdir_loc, removeFileMayNotExist )
> hunk ./src/Darcs/External.hs 83
> import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, hPutDocWith, ($$), (<+>), renderPS,
> simplePrinters,
> text, empty, packedString, vcat, renderString )
> +import Darcs.Email ( formatHeader )
> +#include "impossible.h"
>
> sendmail_path :: IO String
> sendmail_path = do
> hunk ./src/Darcs/External.hs 421
> -> Doc -- ^ body
> -> IO ()
> generateEmail h f t s cc body = do
> - hPutDocLn h $
> - text "To:" <+> text t
> - $$ text "From:" <+> text f
> - $$ text "Subject:" <+> text s
> - $$ formated_cc
> - $$ text "X-Mail-Originator: Darcs Version Control System"
> - $$ text ("X-Darcs-Version: " ++ darcs_version)
> - $$ body
> - where formated_cc = if cc == ""
> - then empty
> - else text "Cc:" <+> text cc
> + putHeader "To" t
> + putHeader "From" f
> + putHeader "Subject" s
> + when (not (null cc)) (putHeader "Cc" cc)
> + putHeader "X-Mail-Originator" "Darcs Version Control System"
> + hPutDocLn h body
> + where putHeader field value
> + = B.hPut h (B.append (formatHeader field value) newline)
> + newline = B.singleton 10
>
> have_sendmail :: IO Bool
> have_sendmail = (sendmail_path >> return True) `catch` (\_ -> return False)
Kill unused imports in External.hs
----------------------------------
> Reinier Lamers <tux_rocker at reinier.de>**20090315162254
> Ignore-this: 216f740bf207738dfda5939f68ce9f5b
> ] hunk ./src/Darcs/External.hs 65
>
> import Darcs.Lock ( withTemp, withOpenTemp, tempdir_loc, removeFileMayNotExist )
> import CommandLine ( parseCmd, addUrlencoded )
> -import ThisVersion ( darcs_version )
> #if defined(HAVE_CURL) || defined(HAVE_HTTP)
> import URL ( copyUrl, copyUrlFirst, waitUrl )
> #else
> hunk ./src/Darcs/External.hs 79
> import Exec ( exec, Redirect(..), withoutNonBlock )
> import Darcs.URL ( is_file, is_url, is_ssh )
> import Darcs.Utils ( catchall )
> -import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, hPutDocWith, ($$), (<+>), renderPS,
> +import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, hPutDocWith, ($$), renderPS,
> simplePrinters,
> text, empty, packedString, vcat, renderString )
> import Darcs.Email ( formatHeader )
> hunk ./src/Darcs/External.hs 83
> -#include "impossible.h"
>
> sendmail_path :: IO String
> sendmail_path = do
>
--
Eric Kow <http://www.nltg.brighton.ac.uk/home/Eric.Kow>
PGP Key ID: 08AC04F9
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 194 bytes
Desc: not available
URL: <http://lists.osuosl.org/pipermail/darcs-users/attachments/20090315/756687b3/attachment.pgp>
More information about the darcs-users
mailing list