changeset 37:513449a88de2

Handle non-ASCII input correctly (assuming UTF-8 encoding). - fastimport library now returns filenames as byte strings, so leave them be - re-encode commit message as UTF-8 - monkeypatch mercurial.encoding to assume UTF-8 for everything
author Greg Ward <greg-hg@gerg.ca>
date Fri, 08 May 2009 11:03:16 -0400
parents 0e4e40caea58
children 3048a2dcf68a
files hgfastimport/hgimport.py
diffstat 1 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/hgfastimport/hgimport.py	Tue May 05 21:04:06 2009 -0400
+++ b/hgfastimport/hgimport.py	Fri May 08 11:03:16 2009 -0400
@@ -138,11 +138,18 @@
         userinfo = cmd.author or cmd.committer
         user = "%s <%s>" % (userinfo[0], userinfo[1])
 
-        # XXX is this the right way to specify filename encoding?!?
-        files = [f.encode("utf-8") for f in commit_handler.filelist()]
+        # Blech: have to monkeypatch mercurial.encoding to ensure that
+        # everything under rawcommit() assumes the same encoding,
+        # regardless of current locale.
+        from mercurial import encoding
+        encoding.encoding = "UTF-8"
+
+        files = commit_handler.filelist()
+        assert type(cmd.message) is unicode
+        text = cmd.message.encode("utf-8") # XXX cmd.message is unicode
         date = self.convert_date(userinfo)
         node = self.repo.rawcommit(
-            files=files, text=cmd.message, user=user, date=date)
+            files=files, text=text, user=user, date=date)
         rev = self.repo.changelog.rev(node)
         if cmd.mark is not None:
             self.mark_map[":" + cmd.mark] = rev