-#!/usr/bin/python
+#!/usr/bin/python2.6
# -*- coding: utf-8 -*-
# Copyright (c) 2009 Alejandro R. SedeƱo <asedeno@mit.edu>
# git svn keyword parsing, populating, and clearing.
from __future__ import with_statement
-import ConfigParser, errno, os, re, urllib
+import errno, os, re, urllib
+from ConfigParser import ConfigParser
+from optparse import OptionParser
+from fnmatch import fnmatch
import git
-VERSION = 0
+VERSION = "0.9.1"
+
+# Where we keep data in the repo.
+def gsk(g):
+ return os.path.join(g.path, 'svn_keywords')
+
+#Configuration Data
+CONFIG = ConfigParser()
+FILES = ConfigParser()
+FILEINFO = ConfigParser()
+
+CONFIG_PATH = ''
+FILES_PATH = ''
+FILEINFO_PATH = ''
+
+
+# Valid keywords:
+svn_keywords = {'Date': ['Date', 'LastDateChanged'],
+ 'Revision': ['Revision', 'LastChangedRevision', 'Rev'],
+ 'Author': ['Author','LastChangedBy'],
+ 'URL': ['HeadURL', 'URL'],
+ 'Id': ['Id']
+ }
+
+# Regular expressions we'll be using to smudge/clean; created as
+# needed and cached.
+svn_keywords_re = {}
+def get_svn_keyword_re(s):
+ if not s in svn_keywords:
+ raise 'Invalid SVN Keyword'
+ if not s in svn_keywords_re:
+ svn_keywords_re[s] = re.compile('\$(' + ('|'.join(svn_keywords[s])) + ')[^$]*\$')
+ return svn_keywords_re[s]
+
+def conf_right_version():
+ ver = -1
+ if CONFIG.has_option('core', 'version'):
+ ver = CONFIG.get('core', 'version')
+ return ver == VERSION
+
+def read_file_data():
+ if conf_right_version():
+ FILES.read(FILES_PATH)
+
+def get_last_rev(path):
+ if not CONFIG.has_section(path):
+ CONFIG.add_section(path)
+
+ lastrev = None
+ if conf_right_version() and CONFIG.has_option(path, 'lastrev'):
+ try:
+ lastrev = CONFIG.getint(path, 'lastrev')
+ except ValueError:
+ lastrev = None
+ return lastrev
+
+
+# Parse the unhandled log.
+def _do_parse_unhandled(directory):
+ base = os.path.join(directory)
+ for d in os.listdir(base):
+ subent = os.path.join(base, d)
+ if (d == 'unhandled.log' and os.path.isfile(subent)):
+ rev = None
+ strip_prefix = g.git.config('--get','svn-remote.svn.fetch').split(':')[0]
+ lastrev = get_last_rev(subent)
+ with open(subent, 'r') as f:
+ # Compile the regular expressions we'll be using here.
+ re_rev = re.compile("^r(\d+)$")
+ re_keywords = re.compile("^\s+[-+]file_prop: (\S+) svn:keywords ?(\S*)$")
+
+ for line in f:
+ m = re_rev.match(line)
+ if m:
+ rev = m.group(1)
+ continue
+
+ if (lastrev >= int(rev)):
+ continue
+
+ m = re_keywords.match(line)
+ if m:
+ path = urllib.unquote(m.group(1))
+ path = os.path.relpath(path, strip_prefix)
+ keywords = set(urllib.unquote(m.group(2)).split(' '))
+ if not FILES.has_section(path):
+ FILES.add_section(path)
+ FILES.set(path, rev, keywords)
+ if rev:
+ lastrev = max(int(rev), lastrev)
+ CONFIG.set(subent, 'lastrev', lastrev)
+ elif (os.path.isdir(subent)):
+ _do_parse_unhandled(subent)
def parse_svn_unhandled(g):
- gsk = g.path + '/svn_keywords'
try:
- os.mkdir(gsk)
+ os.mkdir(gsk(g))
except os.error, e:
if e.errno != errno.EEXIST:
raise
- config = ConfigParser.ConfigParser()
- config_path = gsk + '/conf.ini'
- config.read(config_path)
+ _do_parse_unhandled(os.path.join(g.path, 'svn'))
+ CONFIG.set('core', 'version', VERSION)
- files = ConfigParser.ConfigParser()
- files_path = gsk + '/files.ini'
+ with open(FILES_PATH, 'wb') as f:
+ FILES.write(f)
- if not config.has_section('core'):
- config.add_section('core')
- if config.has_option('core', 'version'):
- ver = config.getint('core', 'ver')
+ with open(CONFIG_PATH, 'wb') as f:
+ CONFIG.write(f)
- lastrev = None
- if ver == VERSION:
- files.read(files_path)
- if config.has_option('core', 'lastrev'):
- lastrev = config.getint('core', 'lastrev')
+def get_path_info(g, path):
+ write_config = False
- with open(g.path + '/svn/git-svn/unhandled.log', 'r') as f:
- # Compile the regular expressions we'll be using here.
- re_rev = re.compile("^r(\d+)$")
- re_keywords = re.compile("^\s+[-+]file_prop: (\S+) svn:keywords ?(\S*)$")
+ # parse ls-tree output and get a blob id for path
+ blob = g.git.ls_tree('HEAD', path).split(' ')[2].split("\t")[0]
- rev = None
- for line in f:
- m = re_rev.match(line)
- if m:
- rev = m.group(1)
- continue
+ # translate that to a commit id
+ if not CONFIG.has_option('BlobToCommit', blob):
+ CONFIG.set('BlobToCommit', blob, g.commits('HEAD', path, 1)[0].id)
+ write_config = True
+ commit = CONFIG.get('BlobToCommit', blob)
- if (lastrev >= int(rev)):
- continue
+ # tranlsate that into an svn revision id
+ if not CONFIG.has_option('CommitToRev', commit):
+ CONFIG.set('CommitToRev',commit,g.git.svn('find-rev', commit))
+ write_config = True
+ file_rev = CONFIG.get('CommitToRev', commit)
- m = re_keywords.match(line)
- if m:
- path = urllib.unquote(m.group(1))
- keywords = set(urllib.unquote(m.group(2)).split(' '))
- if not files.has_section(path):
- files.add_section(path)
- files.set(path, rev, keywords)
+ # get information about that revision
+ info_dict = {}
+ if not CONFIG.has_option('RevInfo', file_rev):
+ for line in g.git.svn('info', path).split("\n"):
+ k, v = line.split(": ", 1)
+ if k == 'Last Changed Date':
+ info_dict['Date'] = v
+ elif k == 'Last Changed Author':
+ info_dict['Author'] = v
+ CONFIG.set('RevInfo', file_rev, info_dict)
+ write_config = True
+ else:
+ info = CONFIG.get('RevInfo', file_rev)
+ info_dict.update(info if type(info) is dict else eval(info))
- lastrev = max(int(rev), lastrev)
- config.set('core', 'lastrev', lastrev)
- config.set('core', 'version', VERSION)
+ if write_config:
+ with open(CONFIG_PATH, 'wb') as f:
+ CONFIG.write(f)
- with open(files_path, 'wb') as f:
- files.write(f)
+ info_dict['Revision'] = file_rev
+ return info_dict
- with open(config_path, 'wb') as f:
- config.write(f)
+def find_last_svn_rev(treeish, parent=0):
+ svnRev = g.git.svn('find-rev', "%s~%i" % (treeish, parent))
+ if svnRev:
+ return int(svnRev)
+ else:
+ return find_last_svn_rev(treeish, parent+1)
-def smudge(g):
- return 0
+# Do the work.
+def smudge(g, options):
+ read_file_data()
+ parse_svn_unhandled(g)
+ rev_head = find_last_svn_rev('HEAD')
+ url_base = g.git.svn('info', '--url')
+
+ FILES.read(FILES_PATH)
+ FILEINFO.read(FILEINFO_PATH)
+
+ ignores = []
+ with open(os.path.join(g.wd,'.git','info','exclude')) as f:
+ for line in f:
+ if line[0] != '#':
+ ignores.append(line.rstrip('\n'))
+
+ paths = FILES.sections()
+ paths.sort()
+ for path in paths:
+ if not os.path.exists(path):
+ continue
+
+ ignore = False
+ for i in ignores:
+ if fnmatch(path, i):
+ ignore = True
+ if ignore:
+ continue
+ try:
+ kw_rev = max(filter(lambda x: x <= rev_head, map(int, FILES.options(path))))
+ except ValueError:
+ continue
+
+ info_dict = {}
+ if not options.clean:
+ info_dict.update(get_path_info(g, path))
+ info_dict['URL'] = '/'.join([url_base, path])
+ info_dict['Name'] = os.path.basename(path)
+ info_dict['Revision'] = str(max(kw_rev, info_dict['Revision']))
+
+ buf = ''
+ with open(os.path.join(g.wd, path), 'r') as f:
+ buf = f.read()
+
+ keywords = eval(FILES.get(path, str(kw_rev)))
+ for k in keywords:
+ for sk in svn_keywords:
+ if k in svn_keywords[sk]:
+ if options.clean:
+ buf = re.sub(get_svn_keyword_re(sk), '$\\1$', buf)
+ elif sk == 'Id':
+ id_str = ' '.join([info_dict['Name'],
+ info_dict['Revision'],
+ info_dict['Date'],
+ info_dict['Author']])
+ buf = re.sub(get_svn_keyword_re(sk), '$\\1: ' + id_str + ' $', buf)
+ else:
+ buf = re.sub(get_svn_keyword_re(sk), '$\\1: ' + info_dict[sk] + ' $', buf)
-def clean(g):
- return 0
+ with open(os.path.join(g.wd, path), 'w') as f:
+ f.write(buf)
+ if options.verbose:
+ print path + ' [' + ', '.join(keywords) + '] [len: ' + str(len(buf)) +']'
if __name__ == '__main__':
- try:
- g = git.Repo()
- except git.errors.InvalidGitRepositoryError:
- print "You are not in a git repository or working directory."
- exit(1)
- parse_svn_unhandled(g)
+
+ parser = OptionParser(version="%prog "+str(VERSION))
+ parser.set_defaults(clean=None)
+ parser.add_option("-s", "--smudge",
+ action="store_false", dest="clean",
+ help="Populate svn:keywords.")
+ parser.add_option("-c", "--clean",
+ action="store_true", dest="clean",
+ help="Return svn:keywords to pristene state.")
+ parser.add_option("-v", "--verbose",
+ action="store_true", dest="verbose", default=False)
+ (options, args) = parser.parse_args()
+
+ if (options.clean is None):
+ parser.print_help()
+ exit(0)
+ else:
+ try:
+ g = git.Repo()
+ except git.errors.InvalidGitRepositoryError:
+ print "You are not in a git repository or working directory."
+ exit(1)
+
+ if g.bare:
+ print "This appears to be a bare git repository."
+ exit(1)
+
+ os.chdir(g.wd)
+
+ CONFIG_PATH = os.path.join(gsk(g), 'conf.ini')
+ FILES_PATH = os.path.join(gsk(g), 'files.ini')
+ FILEINFO_PATH = os.path.join(gsk(g), 'fileinfo.ini')
+
+ CONFIG.read(CONFIG_PATH)
+ for section in ['core','CommitToRev','BlobToCommit', 'RevInfo']:
+ if not CONFIG.has_section(section):
+ CONFIG.add_section(section)
+
+ smudge(g, options)