Changes to 3 files · Browse files at 8de34e3d1f19 Showing diff from parent d0a63d31eac3 Diff from another changeset...
|
@@ -1,5 +1,6 @@ # object_store.py -- Object store for git objects
-# Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
+# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@samba.org>
+# and others
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@@ -220,6 +221,28 @@ obj = self[sha]
return obj
+ def _collect_ancestors(self, heads, common = set()):
+ """Collect all ancestors of heads up to (excluding) those in common
+ :param heads: commits to start from
+ :param common: commits to end at, or empty set to walk repository completely
+ :return: a tuple (A, B) where A - all commits reachable
+ from heads but not present in common, B - common (shared) elements
+ that are directly reachable from heads
+ """
+ bases = set()
+ commits = set()
+ queue = []
+ queue.extend(heads)
+ while queue:
+ e = queue.pop(0)
+ if e in common:
+ bases.add(e)
+ elif e not in commits:
+ commits.add(e)
+ cmt = self[e]
+ queue.extend(cmt.parents)
+ return (commits, bases)
+
class PackBasedObjectStore(BaseObjectStore):
@@ -792,6 +815,52 @@ raise NotTreeError(root_sha)
return tree.lookup_path(lookup_obj, path)
+def _collect_filetree_revs(obj_store, tree_sha, kset):
+ """Collect SHA1s of files and directories for specified tree
+ (identified by SHA1)
+ :param obj_store: Object store to get objects by SHA from
+ :param tree_sha: tree reference to walk
+ :param kset: set to fill with references to files and directories
+ """
+ filetree = obj_store[tree_sha]
+ for name,mode,sha in filetree.iteritems():
+ if not S_ISGITLINK(mode) and sha not in kset:
+ kset.add(sha)
+ if stat.S_ISDIR(mode):
+ _collect_filetree_revs(obj_store, sha, kset)
+
+def _split_commits_and_tags(obj_store, lst, ignore_unknown = False):
+ """Split lst into two lists, one with commit SHA1s, another with
+ tag SHA1s. Commits referenced by tags are included into commits
+ list as well. Only SHA1s known in this repository will get
+ through, and unless ignore_unknown argument is True, KeyError
+ is thrown for SHA1 missing in the repository
+ :param obj_store: Object store to get objects by SHA1 from
+ :param lst: Collection of commit and tag SHAs
+ :param ignore_unknown: True to skip SHA1 missing in the
+ repository silently.
+ :return: A tuple of (commits, tags) SHA1s
+ """
+ commits = set()
+ tags = set()
+ for e in lst:
+ try:
+ o = obj_store[e]
+ except KeyError:
+ if ignore_unknown:
+ pass
+ else:
+ raise
+ else:
+ if isinstance(o, Commit):
+ commits.add(e)
+ elif isinstance(o, Tag):
+ tags.add(e)
+ commits.add(o.object[1])
+ else:
+ raise KeyError('Not a commit or a tag: %s' % e)
+ return (commits, tags)
+
class MissingObjectFinder(object):
"""Find the objects missing from another object store.
@@ -808,11 +877,44 @@
def __init__(self, object_store, haves, wants, progress=None,
get_tagged=None):
- haves = set(haves)
- self.sha_done = haves
- self.objects_to_send = set([(w, None, False) for w in wants
- if w not in haves])
self.object_store = object_store
+ # process Commits and Tags differently
+ # Note, while haves may list commits/tags not available locally,
+ # and such SHAs would get filtered out by _split_commits_and_tags,
+ # wants shall list only known SHAs, and otherwise
+ # _split_commits_and_tags fails with KeyError
+ have_commits, have_tags = \
+ _split_commits_and_tags(object_store, haves, True)
+ want_commits, want_tags = \
+ _split_commits_and_tags(object_store, wants, False)
+ # all_ancestors is a set of commits that shall not be sent
+ # (complete repository up to 'haves')
+ all_ancestors = object_store._collect_ancestors(have_commits)[0]
+ # all_missing - complete set of commits between haves and wants
+ # common - commits from all_ancestors we hit into while
+ # traversing parent hierarchy of wants
+ missing_commits, common_commits = \
+ object_store._collect_ancestors(want_commits, all_ancestors)
+ self.sha_done = set()
+ # Now, fill sha_done with commits and revisions of
+ # files and directories known to be both locally
+ # and on target. Thus these commits and files
+ # won't get selected for fetch
+ for h in common_commits:
+ self.sha_done.add(h)
+ cmt = object_store[h]
+ _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+ # record tags we have as visited, too
+ for t in have_tags:
+ self.sha_done.add(t)
+
+ missing_tags = want_tags.difference(have_tags)
+ # in fact, what we 'want' is commits and tags
+ # we've found missing
+ wants = missing_commits.union(missing_tags)
+
+ self.objects_to_send = set([(w, None, False) for w in wants])
+
if progress is None:
self.progress = lambda x: None
else:
@@ -822,18 +924,6 @@ def add_todo(self, entries):
self.objects_to_send.update([e for e in entries
if not e[0] in self.sha_done])
-
- def parse_tree(self, tree):
- self.add_todo([(sha, name, not stat.S_ISDIR(mode))
- for name, mode, sha in tree.iteritems()
- if not S_ISGITLINK(mode)])
-
- def parse_commit(self, commit):
- self.add_todo([(commit.tree, "", False)])
- self.add_todo([(p, None, False) for p in commit.parents])
-
- def parse_tag(self, tag):
- self.add_todo([(tag.object[1], None, False)])
def next(self):
while True:
@@ -845,11 +935,13 @@ if not leaf:
o = self.object_store[sha]
if isinstance(o, Commit):
- self.parse_commit(o)
+ self.add_todo([(o.tree, "", False)])
elif isinstance(o, Tree):
- self.parse_tree(o)
+ self.add_todo([(s, n, not stat.S_ISDIR(m))
+ for n, m, s in o.iteritems()
+ if not S_ISGITLINK(m)])
elif isinstance(o, Tag):
- self.parse_tag(o)
+ self.add_todo([(o.object[1], None, False)])
if sha in self._tagged:
self.add_todo([(self._tagged[sha], None, True)])
self.sha_done.add(sha)
|
|
|
@@ -1,0 +1,196 @@ + # test_missing_obj_finder.py -- tests for MissingObjectFinder
+# Copyright (C) 2012 syntevo GmbH
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+from dulwich.errors import (
+ MissingCommitError,
+ )
+from dulwich.object_store import (
+ MemoryObjectStore,
+ )
+from dulwich.objects import (
+ Commit,
+ Blob,
+ )
+from dulwich.tests import TestCase
+from utils import (
+ F,
+ make_object,
+ build_commit_graph,
+ )
+
+class MissingObjectFinderTest(TestCase):
+
+ def setUp(self):
+ super(MissingObjectFinderTest, self).setUp()
+ self.store = MemoryObjectStore()
+ self.commits = []
+
+ def __getitem__(self, n):
+ # rename for brevity
+ return self.commits[n-1]
+
+ def cmt(self, n):
+ return self[n]
+
+ def assertMissingMatch(self, haves, wants, expected):
+ for sha,path in self.store.find_missing_objects(haves, wants):
+ self.assertTrue(sha in expected, "FAILURE: (%s,%s) erroneously reported as missing" % (sha,path))
+ expected.remove(sha)
+
+ self.assertFalse(len(expected) > 0, "FAILURE: some objects are not reported as missing: %s" % (expected))
+
+class MOFLinearRepoTest(MissingObjectFinderTest):
+ def setUp(self):
+ super(MOFLinearRepoTest, self).setUp()
+ f1_1 = make_object(Blob, data='f1') # present in 1, removed in 3
+ f2_1 = make_object(Blob, data='f2') # present in all revisions, changed in 2 and 3
+ f2_2 = make_object(Blob, data='f2-changed')
+ f2_3 = make_object(Blob, data='f2-changed-again')
+ f3_2 = make_object(Blob, data='f3') # added in 2, left unmodified in 3
+
+ commit_spec = [[1], [2,1], [3,2]]
+ trees = {1: [('f1', f1_1), ('f2',f2_1)],
+ 2: [('f1',f1_1), ('f2', f2_2), ('f3', f3_2)],
+ 3: [('f2', f2_3), ('f3', f3_2)] }
+ # commit 1: f1 and f2
+ # commit 2: f3 added, f2 changed. Missing shall report commit id and a tree referenced by commit
+ # commit 3: f1 removed, f2 changed. Commit sha and root tree sha shall be reported as modified
+ self.commits = build_commit_graph(self.store, commit_spec, trees)
+ self.missing_1_2 = [self.cmt(2).id, self.cmt(2).tree, f2_2.id, f3_2.id]
+ self.missing_2_3 = [self.cmt(3).id, self.cmt(3).tree, f2_3.id]
+ self.missing_1_3 = [
+ self.cmt(2).id, self.cmt(3).id,
+ self.cmt(2).tree, self.cmt(3).tree,
+ f2_2.id, f3_2.id, f2_3.id]
+
+
+ def test_1_to_2(self):
+ self.assertMissingMatch([self.cmt(1).id], [self.cmt(2).id], self.missing_1_2)
+
+ def test_2_to_3(self):
+ self.assertMissingMatch([self.cmt(2).id], [self.cmt(3).id], self.missing_2_3)
+
+ def test_1_to_3(self):
+ self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id], self.missing_1_3)
+
+ def test_bogus_haves_failure(self):
+ """Ensure non-existent SHA in haves are not tolerated"""
+ bogus_sha = self.cmt(2).id[::-1]
+ haves = [self.cmt(1).id, bogus_sha]
+ wants = [self.cmt(3).id]
+ self.assertRaises(KeyError, self.store.find_missing_objects, self.store, haves, wants)
+
+ def test_bogus_wants_failure(self):
+ """Ensure non-existent SHA in wants are not tolerated"""
+ bogus_sha = self.cmt(2).id[::-1]
+ haves = [self.cmt(1).id]
+ wants = [self.cmt(3).id, bogus_sha]
+ self.assertRaises(KeyError, self.store.find_missing_objects, self.store, haves, wants)
+
+ def test_no_changes(self):
+ self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
+
+class MOFMergeForkRepoTest(MissingObjectFinderTest):
+ """ 1 --- 2 --- 4 --- 6 --- 7
+ \ /
+ 3 ---
+ \
+ 5
+ """
+
+ def setUp(self):
+ super(MOFMergeForkRepoTest, self).setUp()
+ f1_1 = make_object(Blob, data='f1')
+ f1_2 = make_object(Blob, data='f1-2')
+ f1_4 = make_object(Blob, data='f1-4')
+ f1_7 = make_object(Blob, data='f1-2') # same data as in rev 2
+ f2_1 = make_object(Blob, data='f2')
+ f2_3 = make_object(Blob, data='f2-3')
+ f3_3 = make_object(Blob, data='f3')
+ f3_5 = make_object(Blob, data='f3-5')
+ commit_spec = [[1], [2,1], [3,2], [4,2], [5,3], [6,3,4], [7,6]]
+ trees = {1: [('f1', f1_1), ('f2',f2_1)],
+ 2: [('f1',f1_2), ('f2', f2_1)], # f1 changed
+ 3: [('f1',f1_2), ('f2', f2_3), ('f3', f3_3)], # f3 added, f2 changed
+ 4: [('f1',f1_4), ('f2',f2_1)], # f1 changed
+ 5: [('f1',f1_2), ('f3', f3_5)], # f2 removed, f3 changed
+ 6: [('f1',f1_4), ('f2',f2_3), ('f3', f3_3)], # merged 3 and 4
+ 7: [('f1',f1_7), ('f2',f2_3)]} # f1 changed to match rev2. f3 removed
+ self.commits = build_commit_graph(self.store, commit_spec, trees)
+
+ self.f1_2_id = f1_2.id
+ self.f1_4_id = f1_4.id
+ self.f1_7_id = f1_7.id
+ self.f2_3_id = f2_3.id
+ self.f3_3_id = f3_3.id
+
+ self.assertEquals(f1_2.id, f1_7.id, "[sanity]")
+
+ def test_have6_want7(self):
+ """
+ have 6, want 7. Ideally, shall not report f1_7 as it's the same as f1_2,
+ however, to do so, MissingObjectFinder shall not record trees of common commits
+ only, but also all parent trees and tree items, which is an overkill
+ (i.e. in sha_done it records f1_4 as known, and doesn't record f1_2 was known
+ prior to that, hence can't detect f1_7 is in fact f1_2 and shall not be reported)
+ """
+ self.assertMissingMatch([self.cmt(6).id], [self.cmt(7).id], [self.cmt(7).id, self.cmt(7).tree, self.f1_7_id])
+
+ def test_have4_want7(self):
+ """
+ have 4, want 7. Shall not include rev5 as it is not in the tree between 4 and 7
+ (well, it is, but its SHA's are irrelevant for 4..7 commit hierarchy)
+ """
+ self.assertMissingMatch([self.cmt(4).id], [self.cmt(7).id], [
+ self.cmt(7).id, self.cmt(6).id, self.cmt(3).id,
+ self.cmt(7).tree, self.cmt(6).tree, self.cmt(3).tree,
+ self.f2_3_id, self.f3_3_id])
+
+ def test_have1_want6(self):
+ """
+ have 1, want 6. Shall not include rev5
+ """
+ self.assertMissingMatch([self.cmt(1).id], [self.cmt(6).id], [
+ self.cmt(6).id, self.cmt(4).id, self.cmt(3).id, self.cmt(2).id,
+ self.cmt(6).tree, self.cmt(4).tree, self.cmt(3).tree, self.cmt(2).tree,
+ self.f1_2_id, self.f1_4_id, self.f2_3_id, self.f3_3_id])
+
+ def test_have3_want6(self):
+ """
+ have 3, want 7. Shall not report rev2 and its tree, because
+ haves(3) means has parents, i.e. rev2, too
+ BUT shall report any changes descending rev2 (excluding rev3)
+ Shall NOT report f1_7 as it's techically == f1_2
+ """
+ self.assertMissingMatch([self.cmt(3).id], [self.cmt(7).id], [
+ self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
+ self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
+ self.f1_4_id])
+
+ def test_have5_want7(self):
+ """
+ have 5, want 7. Common parent is rev2, hence children of rev2 from
+ a descent line other than rev5 shall be reported
+ """
+ # expects f1_4 from rev6. f3_5 is known in rev5;
+ # f1_7 shall be the same as f1_2 (known, too)
+ self.assertMissingMatch([self.cmt(5).id], [self.cmt(7).id], [
+ self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
+ self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
+ self.f1_4_id])
+
|
Loading...