[darcs-users] [patch260] Implement darcs optimize --http (and 2 more)

Petr Rockai me at mornfall.net
Wed Jun 23 20:47:31 UTC 2010


Hi,

Add --http flag for optimize
----------------------------

> hunk ./src/Darcs/Arguments.lhs 89
>                           networkOptions, noCache,
>                           allowUnrelatedRepos,
>                           checkOrRepair, justThisRepo, optimizePristine,
> -                         getOutput
> +                         optimizeHTTP, getOutput
>                        ) where
>  import System.Console.GetOpt
>  import System.Directory ( doesDirectoryExist )
> hunk ./src/Darcs/Arguments.lhs 320
>  getContent Repair = NoContent
>  getContent JustThisRepo = NoContent
>  getContent OptimizePristine = NoContent
> +getContent OptimizeHTTP = NoContent
>  
>  getContentString :: DarcsFlag -> Maybe String
>  getContentString f =
> hunk ./src/Darcs/Arguments.lhs 1611
>  optimizePristine :: DarcsOption
>  optimizePristine = DarcsNoArgOption [] ["pristine"] OptimizePristine
>                            "optimize hashed pristine layout"
> +
> +optimizeHTTP :: DarcsOption
> +optimizeHTTP = DarcsNoArgOption [] ["http"] OptimizeHTTP
> +                          "optimize repository for getting over network"
>  \end{code}
>  \begin{options}
>  --umask
> hunk ./src/Darcs/Flags.hs 92
>                 | UseFormat2
>                 | PristinePlain | PristineNone | NoUpdateWorking
>                 | Sibling AbsolutePath | Relink | RelinkPristine | NoLinks
> -               | OptimizePristine
> +               | OptimizePristine | OptimizeHTTP
>                 | UpgradeFormat
>                 | Files | NoFiles | Directories | NoDirectories
>                 | Pending | NoPending
Ok.

Refactor Darcs.Repository.copyInventory (consistent naming)
-----------------------------------------------------------

> hunk ./src/Darcs/Repository.hs 100
>  import Darcs.Witnesses.Ordered ( FL(..), RL(..), bunchFL, mapFL, mapRL
>                                 , reverseRL ,lengthRL, (+>+) )
>  import Darcs.Patch.Info ( PatchInfo )
> -import Darcs.Repository.Format ( RepoProperty ( HashedInventory ),
> +import Darcs.Repository.Format ( RepoProperty ( HashedInventory ), RepoFormat,
>                                   createRepoFormat, formatHas, writeRepoFormat )
>  import Darcs.Repository.Prefs ( writeDefaultPrefs )
>  import Darcs.Repository.Pristine ( createPristine, flagsToPristine, createPristineFromWorking )
> hunk ./src/Darcs/Repository.hs 158
>  
>  data RepoSort = Hashed | Old
>  
> +repoSort :: RepoFormat -> RepoSort
> +repoSort f
> +  | formatHas HashedInventory f = Hashed
> +  | otherwise = Old
> +
>  copyInventory :: forall p C(r u t). RepoPatch p => Repository p C(r u t) -> IO ()
> hunk ./src/Darcs/Repository.hs 164
> -copyInventory fromrepo@(Repo fromdir opts rf (DarcsRepository _ cremote)) = do
> -  repo@(Repo todir xx rf2 (DarcsRepository yy c)) <- identifyRepositoryFor fromrepo "."
> -  newcache <- unionRemoteCaches c cremote fromdir
> -  let newrepo :: Repository p C(r u t)
> -      newrepo = Repo todir xx rf2 (DarcsRepository yy newcache)
> -      copyHashedHashed = HashedRepo.copyRepo newrepo opts fromdir
> -      copyAnythingToOld r = withCurrentDirectory todir $ readRepo r >>=
> +copyInventory fromRepo@(Repo fromDir opts fromFormat (DarcsRepository _ fromCache)) = do
> +  toRepo@(Repo toDir opts' toFormat (DarcsRepository toPristine toCache)) <-
> +    identifyRepositoryFor fromRepo "."
> +  toCache2 <- unionRemoteCaches toCache fromCache fromDir
> +  let toRepo2 :: Repository p C(r u t)
> +      toRepo2 = Repo toDir opts' toFormat $ DarcsRepository toPristine toCache2
It's more common to use ' (prime) as a suffix in Haskell than 2 (the
latter usually means 2-argument, like liftM2...)

> +      copyHashedHashed = HashedRepo.copyRepo toRepo2 opts fromDir
> +      copyAnythingToOld r = withCurrentDirectory toDir $ readRepo r >>=
>                              DarcsRepo.writeInventoryAndPatches opts
I'd say copyAnyToOld is a better name than copyAnythingToOld. (But now I
see this is not a name you introduced -- you can still rename it if you
are amending or extending this, though...)

> hunk ./src/Darcs/Repository.hs 173
> -      repoSort rfx | formatHas HashedInventory rfx = Hashed
> -                   | otherwise = Old
> -  case repoSort rf2 of
> -    Hashed ->
> -        if formatHas HashedInventory rf
> -        then copyHashedHashed
> -        else withCurrentDirectory todir $
> -             do HashedRepo.revertTentativeChanges
> -                patches <- readRepo fromrepo
> +  case repoSort fromFormat of
> +    Hashed -> case repoSort toFormat of
> +      Hashed -> copyHashedHashed
> +      Old -> copyAnythingToOld fromRepo
> +    Old -> case repoSort toFormat of
> +      Hashed -> withCurrentDirectory toDir $ do
> +                HashedRepo.revertTentativeChanges
> +                patches <- readRepo fromRepo
>                  let k = "Copying patch"
>                  beginTedious k
>                  tediousSize k (lengthRL $ newset2RL patches)
> hunk ./src/Darcs/Repository.hs 185
>                  let patches' = progressPatchSet k patches
> -                HashedRepo.writeTentativeInventory c (compression opts) patches'
> +                HashedRepo.writeTentativeInventory toCache {- toCache2? -} (compression opts) patches'

I think toCache is OK, since it's what the original code did.

>                  endTedious k
> hunk ./src/Darcs/Repository.hs 187
> -                HashedRepo.finalizeTentativeChanges repo (compression opts)
> -    Old -> case repoSort rf of
> -           Hashed -> copyAnythingToOld fromrepo
> -           _ -> copyOldrepoPatches opts fromrepo todir
> +                HashedRepo.finalizeTentativeChanges toRepo {- toRepo2? -} (compression opts)
> +      Old -> copyOldrepoPatches opts fromRepo toDir
Again, toRepo should be OK.

Create a function for lazy fetching files
-----------------------------------------

(maybe fix the patch title here to say "fetching of files"?)

> hunk ./src/Darcs/External.hs 7
>      backupByRenaming, backupByCopying,
>      copyFileOrUrl, speculateFileOrUrl, copyFilesOrUrls, copyLocal, cloneFile,
>      cloneTree, cloneTreeExcept, clonePartialsTree, clonePaths,
> -    fetchFilePS, gzFetchFilePS,
> +    fetchFilePS, fetchFileLazyPS, gzFetchFilePS,
>      sendEmail, generateEmail, sendEmailDoc, resendEmail,
>      signString, verifyPS,
>      execDocPipe, execPipeIgnoreError,
> hunk ./src/Darcs/External.hs 64
>              ,hGetContents, writeFile, hPut, length
>              ,take, concat, drop, isPrefixOf, singleton, append)
>  import qualified Data.ByteString.Char8 as BC (unpack, pack)
> +import qualified Data.ByteString.Lazy as BL
>  
>  import Darcs.Lock ( withTemp, withOpenTemp, tempdirLoc, removeFileMayNotExist )
>  import CommandLine ( parseCmd, addUrlencoded )
> hunk ./src/Darcs/External.hs 138
>                                              copyFileOrUrl opts fou t cache
>                                              B.readFile t
>  
> +fetchFileLazyPS :: String -> Cachable -> IO BL.ByteString
> +fetchFileLazyPS fou _ | isFile fou = BL.readFile fou
> +fetchFileLazyPS fou cache = withTemp $ \t -> do let opts = [] -- FIXME: no network flags
> +                                                copyFileOrUrl opts fou t cache
> +                                                BL.readFile t
> +
>  gzFetchFilePS :: String -> Cachable -> IO B.ByteString
>  gzFetchFilePS fou _ | isFile fou = gzReadFilePS fou
>  gzFetchFilePS fou cache = withTemp $ \t-> do let opts = [] -- FIXME: no network flags
Ok, although it should be noted that the lazy readFile may constitute a
resource (fd) leak -- a haddock explaining that would be certainly
appropriate. (I.e. this behaves the same as Prelude.readFile -- see
contrib/darcs-errors.hlint in your darcs source tree for explanation.)

Implement darcs optimize --http
-------------------------------

Ok, the main patch...

> hunk ./src/Darcs/Commands/Optimize.lhs 24
>  {-# LANGUAGE CPP #-}
>  
>  module Darcs.Commands.Optimize ( optimize ) where
> +import Control.Applicative ( (<$>) )
: - )

>  import Control.Monad ( when, unless )
>  import Data.Maybe ( isJust )
>  import System.Directory ( getDirectoryContents, doesDirectoryExist, doesFileExist )
> hunk ./src/Darcs/Commands/Optimize.lhs 29
>  import qualified Data.ByteString.Char8 as BS
> +import qualified Data.ByteString.Lazy as BL
>  
>  import Storage.Hashed.Darcs( decodeDarcsSize )
>  
> hunk ./src/Darcs/Commands/Optimize.lhs 38
>  import Darcs.Arguments ( DarcsFlag( UpgradeFormat, UseHashedInventory,
>                                      Compress, UnCompress,
>                                      NoCompress, Reorder,
> -                                    Relink, RelinkPristine, OptimizePristine ),
> +                                    Relink, RelinkPristine, OptimizePristine,
> +                                    OptimizeHTTP ),
>                          reorderPatches,
>                          uncompressNocompress,
>                          relink, relinkPristine, sibling,
> hunk ./src/Darcs/Commands/Optimize.lhs 45
>                          flagsToSiblings,
>                          upgradeFormat,
> -                        workingRepoDir, umaskOption, optimizePristine
> +                        workingRepoDir, umaskOption, optimizePristine,
> +                        optimizeHTTP
>                        )
>  import Darcs.Repository.Prefs ( getPreflist )
>  import Darcs.Repository ( Repository,
> hunk ./src/Darcs/Commands/Optimize.lhs 91
>  import Storage.Hashed.Plain( readPlainTree )
>  import Storage.Hashed.Darcs( writeDarcsHashed )
>  
> +import Codec.Archive.Tar ( write )
> +import Codec.Archive.Tar.Entry ( fileEntry, toTarPath )
> +import Codec.Compression.GZip ( compress )
> +
>  #include "gadts.h"
>  
>  optimizeDescription :: String
> hunk ./src/Darcs/Commands/Optimize.lhs 138
>                                                   sibling, relink,
>                                                   relinkPristine,
>                                                    upgradeFormat,
> -                                                 optimizePristine]}
> +                                                 optimizePristine,
> +                                                 optimizeHTTP]}
>  
>  optimizeCmd :: [DarcsFlag] -> [String] -> IO ()
>  optimizeCmd origopts _ = do
> hunk ./src/Darcs/Commands/Optimize.lhs 145
>      when (UpgradeFormat `elem` origopts) optimizeUpgradeFormat
>      withRepoLock opts $- \repository -> do
> +    when (OptimizeHTTP `elem` origopts) doOptimizeHTTP
>      if (OptimizePristine `elem` opts)
>         then doOptimizePristine repository
>         else do cleanRepository repository
So far so good.

> hunk ./src/Darcs/Commands/Optimize.lhs 368
>      withCurrentDirectory dir $ do
>        gzs <- filter ((== ".gz") . takeExtension) `fmap` getDirectoryContents "."
>        mapM_ removeFile gzs
> +
> +doOptimizeHTTP :: IO ()
> +doOptimizeHTTP = do
> +  rf <- either fail return =<< identifyRepoFormat "."
> +  unless (formatHas HashedInventory rf) $ fail
> +    "Unsupported repository format"
The error message should explicitly say what was expected: "Only hashed
repositories can be optimized for HTTP" or something in that vein.

> +  createDirectoryIfMissing False packsDir
> +  i <- fileEntry' $ darcsdir </> "hashed_inventory"
> +  is <- tarDarcsDir "inventories"
> +  pr <- tarDarcsDir "pristine.hashed"
> +  BL.writeFile (packsDir </> "basic.tar.gz") . compress $ write (i : (is ++ pr))
> +  ps <- tarDarcsDir' "patches" $ \x -> all (x /=) ["unrevert", "pending",
> +    "pending.tentative"]
> +  BL.writeFile (packsDir </> "patches.tar.gz") . compress $ write ps
> + where
> +  packsDir = darcsdir </> "packs"
> +  fileEntry' x = do
> +    content <- BL.fromChunks . return <$> gzReadFilePS x
> +    tp <- either fail return $ toTarPath False x
> +    return $ fileEntry tp content
> +  dirContents d f = map (d </>) . filter (\x -> head x /= '.' && f x) <$>
> +    getDirectoryContents d
> +  tarDarcsDir d = tarDarcsDir' d $ const True
> +  tarDarcsDir' d f = mapM fileEntry' =<< dirContents (darcsdir </> d) f
>  \end{code}
Looks OK, although I would like to hear from you about memory behaviour
of the code, as discussed before (IIRC). :)

> hunk ./src/Darcs/Repository.hs 48
[SNIP pile of import wibbling]

> hunk ./src/Darcs/Repository.hs 131
> +import qualified Data.ByteString.Lazy as BL
>  
>  #include "impossible.h"
>  
> hunk ./src/Darcs/Repository.hs 235
>             return IsPartial
>  
>  copyFullRepository :: forall p C(r u t). RepoPatch p => Repository p C(r u t) -> IO ()
> -copyFullRepository fromrepository@(Repo fromdir opts rffrom _) = do
> -  copyInventory fromrepository
> +copyFullRepository fromRepo@(Repo fromDir opts _ _) = do
>    debugMessage "Copying prefs"
> hunk ./src/Darcs/Repository.hs 237
> -  copyFileOrUrl opts (fromdir++"/"++darcsdir++"/prefs/prefs") (darcsdir++"/prefs/prefs") (MaxAge 600)
> -                     `catchall` return ()
> +  copyFileOrUrl opts (fromDir ++ "/" ++ darcsdir ++ "/prefs/prefs")
> +    (darcsdir ++ "/prefs/prefs") (MaxAge 600) `catchall` return ()
(about reformatting: I am not complaining about how it looks now, but it
helps review to do formatting changes in separate patch that says it's
just formatting)

> +  b <- (Just <$> fetchFileLazyPS (fromDir ++ "/" ++ darcsdir ++
> +    "/packs/basic.tar.gz") Uncachable) `catchall` return Nothing
> +  case b of
> +    Nothing -> copyNotPackedRepository fromRepo
> +    Just b' -> copyPackedRepository fromRepo b'
> +
> +copyNotPackedRepository :: forall p C(r u t). RepoPatch p => Repository p C(r u t) -> IO ()
> +copyNotPackedRepository fromrepository@(Repo _ opts rffrom _) = do
The "NotPacked" in the name is a bit edgy, but I can't think of anything
better that's also clear enough, so keep it as it is.

> +  copyInventory fromrepository
>    debugMessage "Grabbing lock in new repository..."

> hunk ./src/Darcs/Repository.hs 249
> -  withRepoLock opts $- \torepository@(Repo _ _ rfto (DarcsRepository _ c)) ->
> +  withRepoLock opts $- \torepository@(Repo _ _ rfto _) ->
>        if formatHas HashedInventory rffrom && formatHas HashedInventory rfto
>        then do debugMessage "Writing working directory contents..."
>                createPristineDirectoryTree torepository "."
Is this just a warning fix?

> hunk ./src/Darcs/Repository.hs 268
>                     debugMessage "Writing the pristine"
>                     pristineFromWorking torepository

> +copyPackedRepository :: forall p C(r u t). RepoPatch p =>
> +  Repository p C(r u t) -> BL.ByteString -> IO ()
> +copyPackedRepository fromRepo@(Repo fromDir opts _ (DarcsRepository _ fromCache)) b = do
> +  Repo toDir _ toFormat (DarcsRepository toPristine toCache) <-
> +    identifyRepositoryFor fromRepo "."
> +  toCache2 <- unionRemoteCaches toCache fromCache fromDir
> +  let toRepo :: Repository p C(r u t)
> +      toRepo = Repo toDir opts toFormat $ DarcsRepository toPristine toCache2
> +      fromPacksDir = fromDir ++ "/" ++ darcsdir ++ "/packs/"
> +  createDirectoryIfMissing False $ toDir </> darcsdir </> "inventories"
> +  createDirectoryIfMissing False $ toDir </> darcsdir </> "pristine.hashed"
> +  createDirectoryIfMissing False $ toDir </> darcsdir </> "patches"
> +  copySources toRepo fromDir
> +  -- unpack inventory & pristine cache
> +  writeCompressed . Tar.read $ decompress b
> +  createPristineDirectoryTree toRepo "."
For all I can tell, this function is a complete misnomer: what this does
is copy the existing pristine into the working copy. (!) It is out of
scope for this patch, but I am noting down that it needs to be audited
and renamed.

> +  -- pull new patches
> +  us <- readRepo toRepo
> +  them <- readRepo fromRepo
> +  comm :\/: unc <- return $ findCommonAndUncommon us them
Hm, this is my sin, but the findCommonAndUncommon function actually does
not return any "common" patches. I will rename it later... You probably
want to rename "comm" and "unc" to "us'" and "them'".

> +  revertTentativeChanges
This might be redundant, but let's keep it in for a good measure.

> +  Sealed pw <- tentativelyMergePatches toRepo "get" opts comm unc
us' them' (due to above)

> +  invalidateIndex toRepo
> +  withGutsOf toRepo $ do
> +    finalizeRepositoryChanges toRepo
> +    applyToWorking toRepo opts pw
> +    return ()
Ok.

> +  -- get old patches
> +  writeCompressed . Tar.read . decompress =<< fetchFileLazyPS (fromPacksDir ++
> +    "patches.tar.gz") Uncachable
Great. We should also make this interruptible later, like normal "get"
is, with the result of getting a lazy repository. You can do this in a
followup patch and I won't hold up pushing this just for that.

> + where
> +  writeCompressed Tar.Done = return ()
> +  writeCompressed (Tar.Next x xs) = case Tar.entryContent x of
> +    Tar.NormalFile x' _ -> do
> +      let p = Tar.entryPath x
> +      BL.writeFile p $ if "hashed_inventory" `isSuffixOf` p
> +        then x'
> +        else compress x'
> +      writeCompressed xs
> +    _ -> fail "Unexpected non-file tar entry"
> +  writeCompressed (Tar.Fail e) = fail e
OK.

>  -- | writePatchSet is like patchSetToRepository, except that it doesn't
>  -- touch the working directory or pristine cache.
>  writePatchSet :: RepoPatch p => PatchSet p C(Origin x) -> [DarcsFlag] -> IO (Repository p C(r u t))
> hunk ./src/Darcs/Repository.hs 411
>          withCurrentDirectory dir $ readWorking >>= replacePristine repo
>  pristineFromWorking (Repo dir _ _ (DarcsRepository p _)) =
>    withCurrentDirectory dir $ createPristineFromWorking p
> +
> hunk ./src/Darcs/Repository/HashedRepo.hs 29
>                                       addToTentativeInventory, removeFromTentativeInventory,
>                                       readRepo, readTentativeRepo, writeAndReadPatch,
>                                       writeTentativeInventory, copyRepo,
> -                                     readHashedPristineRoot, pris2inv
> +                                     readHashedPristineRoot, pris2inv, copySources
>                                     ) where
>  
>  import System.Directory ( createDirectoryIfMissing )
> hunk ./src/Darcs/Repository/HashedRepo.hs 293
>      createDirectoryIfMissing False (outr++"/"++darcsdir++"/inventories")
>      copyFileOrUrl opts (inr++"/"++darcsdir++"/hashed_inventory") (outr++"/"++darcsdir++"/hashed_inventory")
>                    Uncachable -- no need to copy anything but hashed_inventory!
> +    copySources repo inr
> +    debugMessage "Done copying hashed inventory."
> +
> +copySources :: RepoPatch p => Repository p C(r u t) -> String -> IO ()
> +copySources repo@(Repo outr _ _ _) inr = do
>      let repoCache = extractCache $ modifyCache repo dropGlobalCaches
>      appendBinFile (outr++"/"++darcsdir++"/prefs/sources") (show $ repo2cache inr `unionCaches` repoCache )
> hunk ./src/Darcs/Repository/HashedRepo.hs 300
> -    debugMessage "Done copying hashed inventory."
>    where
>      dropGlobalCaches (Ca cache) = Ca $ filter notGlobalCache cache
>      notGlobalCache xs = case xs of

Split off copySources from copyRepo. Makes sense. Does not change
copyRepo semantics.

So, there's some minor wibbling to do still, but other than that,
awesome. I guess nothing of the mentioned issues warrants amending -- if
you run into dependencies, just record new patch(es) on top. I will hold
off pushing this till Thursday evening -- at that point, unless I run
into bugs, I can push. Please try to address my comments by then -- if
not, I will take care of the most pressing ones and will expect you to
post followup patches to fix the rest.

Thanks!

Yours,
   Petr.


More information about the darcs-users mailing list