]> asedeno.scripts.mit.edu Git - git-svn-keywords.git/blobdiff - git-svn-keywords.py
Detect bare repositories and path in working directory
[git-svn-keywords.git] / git-svn-keywords.py
index 102e3e7b9ddc4a329d818c9aa8fac77fe00f9749..4abf3b76c6ae97c75e2ede359f5f51c05d1cd461 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2.6
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2009 Alejandro R. SedeƱo <asedeno@mit.edu>
 # git svn keyword parsing, populating, and clearing.
 
 from __future__ import with_statement
-import ConfigParser, errno, os, re, urllib
+import errno, os, re, urllib
+from ConfigParser import ConfigParser
+from optparse import OptionParser
+from fnmatch import fnmatch
 import git
 
-VERSION = 0
+VERSION = "0.9.1"
+
+# Where we keep data in the repo.
+def gsk(g):
+    return os.path.join(g.path, 'svn_keywords')
+
+#Configuration Data
+CONFIG = ConfigParser()
+FILES = ConfigParser()
+FILEINFO = ConfigParser()
+
+CONFIG_PATH = ''
+FILES_PATH = ''
+FILEINFO_PATH = ''
+
+
+# Valid keywords:
+svn_keywords = {'Date': ['Date', 'LastDateChanged'],
+                'Revision': ['Revision', 'LastChangedRevision', 'Rev'],
+                'Author': ['Author','LastChangedBy'],
+                'URL': ['HeadURL', 'URL'],
+                'Id': ['Id']
+                }
+
+# Regular expressions we'll be using to smudge/clean; created as
+# needed and cached.
+svn_keywords_re = {}
+def get_svn_keyword_re(s):
+    if not s in svn_keywords:
+        raise 'Invalid SVN Keyword'
+    if not s in svn_keywords_re:
+        svn_keywords_re[s] = re.compile('\$(' + ('|'.join(svn_keywords[s])) + ')[^$]*\$')
+    return svn_keywords_re[s]
+
+def conf_right_version():
+    ver = -1
+    if CONFIG.has_option('core', 'version'):
+        ver = CONFIG.get('core', 'version')
+    return ver == VERSION
+
+def read_file_data():
+    if conf_right_version():
+        FILES.read(FILES_PATH)
+
+def get_last_rev(path):
+    if not CONFIG.has_section(path):
+        CONFIG.add_section(path)
+
+    lastrev = None
+    if conf_right_version() and CONFIG.has_option(path, 'lastrev'):
+        try:
+            lastrev = CONFIG.getint(path, 'lastrev')
+        except ValueError:
+            lastrev = None
+    return lastrev
+
+
+# Parse the unhandled log.
+def _do_parse_unhandled(directory):
+    base = os.path.join(directory)
+    for d in os.listdir(base):
+        subent = os.path.join(base, d)
+        if (d == 'unhandled.log' and os.path.isfile(subent)):
+            rev = None
+            strip_prefix = g.git.config('--get','svn-remote.svn.fetch').split(':')[0]
+            lastrev = get_last_rev(subent)
+            with open(subent, 'r') as f:
+                # Compile the regular expressions we'll be using here.
+                re_rev = re.compile("^r(\d+)$")
+                re_keywords = re.compile("^\s+[-+]file_prop: (\S+) svn:keywords ?(\S*)$")
+
+                for line in f:
+                    m = re_rev.match(line)
+                    if m:
+                        rev = m.group(1)
+                        continue
+
+                    if (lastrev >= int(rev)):
+                        continue
+
+                    m = re_keywords.match(line)
+                    if m:
+                        path = urllib.unquote(m.group(1))
+                        path = os.path.relpath(path, strip_prefix)
+                        keywords = set(urllib.unquote(m.group(2)).split(' '))
+                        if not FILES.has_section(path):
+                            FILES.add_section(path)
+                        FILES.set(path, rev, keywords)
+            if rev:
+                lastrev = max(int(rev), lastrev)
+                CONFIG.set(subent, 'lastrev', lastrev)
+        elif (os.path.isdir(subent)):
+            _do_parse_unhandled(subent)
 
 def parse_svn_unhandled(g):
-    gsk = g.path + '/svn_keywords'
     try:
-        os.mkdir(gsk)
+        os.mkdir(gsk(g))
     except os.error, e:
         if e.errno != errno.EEXIST:
             raise
 
-    config = ConfigParser.ConfigParser()
-    config_path = gsk + '/conf.ini'
-    config.read(config_path)
+    _do_parse_unhandled(os.path.join(g.path, 'svn'))
+    CONFIG.set('core', 'version', VERSION)
 
-    files = ConfigParser.ConfigParser()
-    files_path = gsk + '/files.ini'
+    with open(FILES_PATH, 'wb') as f:
+        FILES.write(f)
 
-    if not config.has_section('core'):
-        config.add_section('core')
-    if config.has_option('core', 'version'):
-        ver = config.getint('core', 'ver')
+    with open(CONFIG_PATH, 'wb') as f:
+        CONFIG.write(f)
 
-    lastrev = None
-    if ver == VERSION:
-        files.read(files_path)
-        if config.has_option('core', 'lastrev'):
-            lastrev = config.getint('core', 'lastrev')
+def get_path_info(g, path):
+    write_config = False
 
-    with open(g.path + '/svn/git-svn/unhandled.log', 'r') as f:
-        # Compile the regular expressions we'll be using here.
-        re_rev = re.compile("^r(\d+)$")
-        re_keywords = re.compile("^\s+[-+]file_prop: (\S+) svn:keywords ?(\S*)$")
+    # parse ls-tree output and get a blob id for path
+    blob = g.git.ls_tree('HEAD', path).split(' ')[2].split("\t")[0]
 
-        rev = None
-        for line in f:
-            m = re_rev.match(line)
-            if m:
-                rev = m.group(1)
-                continue
+    # translate that to a commit id
+    if not CONFIG.has_option('BlobToCommit', blob):
+        CONFIG.set('BlobToCommit', blob, g.commits('HEAD', path, 1)[0].id)
+        write_config = True
+    commit = CONFIG.get('BlobToCommit', blob)
 
-            if (lastrev >= int(rev)):
-                continue
+    # tranlsate that into an svn revision id
+    if not CONFIG.has_option('CommitToRev', commit):
+        CONFIG.set('CommitToRev',commit,g.git.svn('find-rev', commit))
+        write_config = True
+    file_rev = CONFIG.get('CommitToRev', commit)
 
-            m = re_keywords.match(line)
-            if m:
-                path = urllib.unquote(m.group(1))
-                keywords = set(urllib.unquote(m.group(2)).split(' '))
-                if not files.has_section(path):
-                    files.add_section(path)
-                files.set(path, rev, keywords)
+    # get information about that revision
+    info_dict = {}
+    if not CONFIG.has_option('RevInfo', file_rev):
+        for line in g.git.svn('info', path).split("\n"):
+            k, v = line.split(": ", 1)
+            if k == 'Last Changed Date':
+                info_dict['Date'] = v
+            elif k == 'Last Changed Author':
+                info_dict['Author'] = v
+        CONFIG.set('RevInfo', file_rev, info_dict)
+        write_config = True
+    else:
+        info = CONFIG.get('RevInfo', file_rev)
+        info_dict.update(info if type(info) is dict else eval(info))
 
-        lastrev = max(int(rev), lastrev)
-        config.set('core', 'lastrev', lastrev)
-        config.set('core', 'version', VERSION)
+    if write_config:
+        with open(CONFIG_PATH, 'wb') as f:
+            CONFIG.write(f)
 
-    with open(files_path, 'wb') as f:
-        files.write(f)
+    info_dict['Revision'] = file_rev
+    return info_dict
 
-    with open(config_path, 'wb') as f:
-        config.write(f)
+def find_last_svn_rev(treeish, parent=0):
+    svnRev = g.git.svn('find-rev', "%s~%i" % (treeish, parent))
+    if svnRev:
+        return int(svnRev)
+    else:
+        return find_last_svn_rev(treeish, parent+1)
 
-def smudge(g):
-    return 0
+# Do the work.
+def smudge(g, options):
+    read_file_data()
+    parse_svn_unhandled(g)
+    rev_head = find_last_svn_rev('HEAD')
+    url_base = g.git.svn('info', '--url')
+
+    FILES.read(FILES_PATH)
+    FILEINFO.read(FILEINFO_PATH)
+
+    ignores = []
+    with open(os.path.join(g.wd,'.git','info','exclude')) as f:
+        for line in f:
+            if line[0] != '#':
+                ignores.append(line.rstrip('\n'))
+
+    paths = FILES.sections()
+    paths.sort()
+    for path in paths:
+        if not os.path.exists(path):
+            continue
+
+        ignore = False
+        for i in ignores:
+            if fnmatch(path, i):
+                ignore = True
+        if ignore:
+            continue
+        try:
+            kw_rev = max(filter(lambda x: x <= rev_head, map(int, FILES.options(path))))
+        except ValueError:
+            continue
+
+        info_dict = {}
+        if not options.clean:
+            info_dict.update(get_path_info(g, path))
+            info_dict['URL'] = '/'.join([url_base, path])
+            info_dict['Name'] = os.path.basename(path)
+            info_dict['Revision'] = str(max(kw_rev, info_dict['Revision']))
+
+        buf = ''
+        with open(os.path.join(g.wd, path), 'r') as f:
+            buf = f.read()
+
+        keywords = eval(FILES.get(path, str(kw_rev)))
+        for k in keywords:
+            for sk in svn_keywords:
+                if k in svn_keywords[sk]:
+                    if options.clean:
+                        buf = re.sub(get_svn_keyword_re(sk), '$\\1$', buf)
+                    elif sk == 'Id':
+                        id_str = ' '.join([info_dict['Name'],
+                                           info_dict['Revision'],
+                                           info_dict['Date'],
+                                           info_dict['Author']])
+                        buf = re.sub(get_svn_keyword_re(sk), '$\\1: ' + id_str + ' $', buf)
+                    else:
+                        buf = re.sub(get_svn_keyword_re(sk), '$\\1: ' + info_dict[sk] + ' $', buf)
 
-def clean(g):
-    return 0
+        with open(os.path.join(g.wd, path), 'w') as f:
+            f.write(buf)
+        if options.verbose:
+            print path + ' [' + ', '.join(keywords) + '] [len: ' + str(len(buf)) +']'
 
 if __name__ == '__main__':
-    try:
-        g = git.Repo()
-    except git.errors.InvalidGitRepositoryError:
-        print "You are not in a git repository or working directory."
-        exit(1)
-    parse_svn_unhandled(g)
+
+    parser = OptionParser(version="%prog "+str(VERSION))
+    parser.set_defaults(clean=None)
+    parser.add_option("-s", "--smudge",
+                      action="store_false", dest="clean",
+                      help="Populate svn:keywords.")
+    parser.add_option("-c", "--clean",
+                      action="store_true", dest="clean",
+                      help="Return svn:keywords to pristene state.")
+    parser.add_option("-v", "--verbose",
+                      action="store_true", dest="verbose", default=False)
+    (options, args) = parser.parse_args()
+
+    if (options.clean is None):
+        parser.print_help()
+        exit(0)
+    else:
+        try:
+            g = git.Repo()
+        except git.errors.InvalidGitRepositoryError:
+            print "You are not in a git repository or working directory."
+            exit(1)
+
+        if g.bare:
+            print "This appears to be a bare git repository."
+            exit(1)
+
+        os.chdir(g.wd)
+
+        CONFIG_PATH = os.path.join(gsk(g), 'conf.ini')
+        FILES_PATH = os.path.join(gsk(g), 'files.ini')
+        FILEINFO_PATH = os.path.join(gsk(g), 'fileinfo.ini')
+
+        CONFIG.read(CONFIG_PATH)
+        for section in ['core','CommitToRev','BlobToCommit', 'RevInfo']:
+            if not CONFIG.has_section(section):
+                CONFIG.add_section(section)
+
+        smudge(g, options)