[darcs-users] How to look at older versions of a file

Alberto Bertogli albertogli at telpin.com.ar
Thu Dec 1 05:35:06 UTC 2005


On Wed, Nov 30, 2005 at 11:10:57PM +0000, Daniel Carrera wrote:
> Richard A. Smith wrote:
> >darcs unpull
> >
> >You can then select the patches you want to back out and the copy of the 
> >file you desire will be the version you are interested in.
> 
> What would be really nice is to output the contents of one specific file 
> as it was before patch xyz was applied. I think that's what Rob wants, 
> and it's certainly something I would want.
> 
> Is there a way to do this?

Yes. You can do "darcs annotate file -p patch" and parse the annotate
output. If you just want the file contents, write a script that parses the
XML output and show you the "clean" file.

If you need sample code, you can find it in darcsweb's annotate parsing.


Oh, well, it's too tempting; the script is attached =P

darcs annotate --xml-output filename -p "Patch description" | ./ann2ascii

will do the trick.

I haven't tested it a lot, and could be cleaner; but I guess it's useful
as an example.

Thanks,
		Alberto

-------------- next part --------------
#!/usr/bin/env python

# Set this to the encoding your repos are in.

#encoding = 'latin1'
encoding = 'utf8'


#
# You shouldn't need to touch anything past here
#


import sys
import xml.sax

# I _hate_ this.
def fixu8(s):
	openpos = s.find('[_')
	if openpos < 0:
		# small optimization to avoid the conversion to utf8 and
		# entering the loop
		if type(s) == unicode:
			# workaround for python < 2.4
			return s.encode('utf8')
		else:
			return s.decode(encoding).encode('utf8')

	s = s.encode(encoding).decode('raw_unicode_escape')
	while openpos >= 0:
		closepos = s.find('_]', openpos)
		if closepos < 0:
			# not closed, probably just luck
			break

		# middle should be something like 'c3', so we get it by
		# removing the first three characters ("[_\")
		middle = s[openpos + 3:closepos]
		if len(middle) == 2:
			# now we turn middle into the character "\xc3"
			char = chr(int(middle, 16))

			# finally, replace s with our new improved string, and
			# repeat the ugly procedure
			char = char.decode('raw_unicode_escape')
			mn = '[_\\' + middle + '_]'
			s = s.replace(mn, char, 1)
		openpos = s.find('[_', openpos + 1)

	if encoding != 'utf8':
		s = s.encode('utf8')
	else:
		s = s.encode('raw_unicode_escape', 'replace')
	return s


class Annotate:
	def __init__(self):
		self.fname = ""
		self.creator_hash = ""
		self.created_as = ""
		self.lastchange_hash = ""
		self.lastchange_author = ""
		self.lastchange_name = ""
		self.lastchange_date = None
		self.firstdate = None
		self.lastdate = None
		self.lines = []
		self.patches = {}

	class Line:
		def __init__(self):
			self.text = ""
			self.phash = None
			self.pauthor = None
			self.pdate = None

	def write(self):
		for l in self.lines:
			if not l.text:
				sys.stdout.write('\n')
			else:
				sys.stdout.write(l.text)


def parse_annotate(src):
	import xml.dom.minidom

	annotate = Annotate()

	# FIXME: convert the source to UTF8; it _has_ to be a way to let
	# minidom know the source encoding
	s = ""
	for i in src:
		s += fixu8(i)

	dom = xml.dom.minidom.parseString(s)

	file = dom.getElementsByTagName("file")[0]
	annotate.fname = fixu8(file.getAttribute("name"))

	createinfo = dom.getElementsByTagName("created_as")[0]
	annotate.created_as = fixu8(createinfo.getAttribute("original_name"))

	creator = createinfo.getElementsByTagName("patch")[0]
	annotate.creator_hash = fixu8(creator.getAttribute("hash"))

	mod = dom.getElementsByTagName("modified")[0]
	lastpatch = mod.getElementsByTagName("patch")[0]
	annotate.lastchange_hash = fixu8(lastpatch.getAttribute("hash"))
	annotate.lastchange_author = fixu8(lastpatch.getAttribute("author"))

	lastname = lastpatch.getElementsByTagName("name")[0]
	lastname = lastname.childNodes[0].wholeText
	annotate.lastchange_name = fixu8(lastname)

	lastdate = lastpatch.getAttribute("date")
	annotate.lastchange_date = lastdate

	annotate.patches[annotate.lastchange_hash] = annotate.lastchange_date

	# these will be overriden by the real dates later
	annotate.firstdate = lastdate
	annotate.lastdate = 0

	file = dom.getElementsByTagName("file")[0]

	for l in file.childNodes:
		# we're only intrested in normal and added lines
		if l.nodeName not in ["normal_line", "added_line"]:
			continue
		line = Annotate.Line()

		if l.nodeName == "normal_line":
			patch = l.getElementsByTagName("patch")[0]
			phash = patch.getAttribute("hash")
			pauthor = patch.getAttribute("author")
			pdate = patch.getAttribute("date")
		else:
			# added lines inherit the creation from the annotate
			# patch
			phash = annotate.lastchange_hash
			pauthor = annotate.lastchange_author
			pdate = annotate.lastchange_date

		text = ""
		for node in l.childNodes:
			if node.nodeType == node.TEXT_NODE:
				text += node.wholeText

		# strip all "\n"s at the beginning; because the way darcs
		# formats the xml output it makes the DOM parser to add "\n"s
		# in front of it
		text = text.lstrip("\n")

		line.text = fixu8(text)
		line.phash = fixu8(phash)
		line.pauthor = fixu8(pauthor)
		line.pdate = pdate
		annotate.lines.append(line)
		annotate.patches[line.phash] = line.pdate

		if pdate > annotate.lastdate:
			annotate.lastdate = pdate
		if pdate < annotate.firstdate:
			annotate.firstdate = pdate

	return annotate



ann = parse_annotate(sys.stdin)
ann.write()



More information about the darcs-users mailing list