Kiln » Dependencies » Dulwich Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in master

Make ShaFiles created from files trust their filenames.

This adds a dummy FixedSha class that implements the read-only part
of hashlib's hash interface but does not actually compute a SHA-1
digest. This allows us to assign ids to file objects based on their
filename rather than requiring a read of the whole file; SHA-1s will
soon be checked during check().

Added a filename_to_sha helper function to objects.py; refactored the
opposite sha_to_filename functionality into this file as well for
parallelism and testing. As a side effect, reorganized some files in
tests/data to have the normal 2/38 filename structure.

Change-Id: Ic459628aec32a92e29ea49cfd6cbe685053971ef

Changeset c866eded426f

Parent 79b760a98784

committed by Jelmer Vernooij

authored by Dave Borowitz

Changes to 11 files · Browse files at c866eded426f Showing diff from parent 79b760a98784 Diff from another changeset...

 
39
40
41
 
42
43
44
 
362
363
364
365
366
367
368
 
369
370
371
 
39
40
41
42
43
44
45
 
363
364
365
 
 
366
 
367
368
369
370
@@ -39,6 +39,7 @@
  Tree,   hex_to_sha,   sha_to_hex, + hex_to_filename,   S_ISGITLINK,   )  from dulwich.pack import ( @@ -362,10 +363,8 @@
  raise     def _get_shafile_path(self, sha): - dir = sha[:2] - file = sha[2:]   # Check from object dir - return os.path.join(self.path, dir, file) + return hex_to_filename(self.path, sha)     def _iter_loose_objects(self):   for base in os.listdir(self.path):
 
84
85
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
88
89
 
120
121
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
124
125
 
282
283
284
 
285
286
287
 
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
 
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
 
317
318
319
320
321
322
323
@@ -84,6 +84,27 @@
  return binascii.unhexlify(hex)     +def hex_to_filename(path, hex): + """Takes a hex sha and returns its filename relative to the given path.""" + dir = hex[:2] + file = hex[2:] + # Check from object dir + return os.path.join(path, dir, file) + + +def filename_to_hex(filename): + """Takes an object filename and returns its corresponding hex sha.""" + # grab the last (up to) two path components + names = filename.rsplit(os.path.sep, 2)[-2:] + errmsg = "Invalid object filename: %s" % filename + assert len(names) == 2, errmsg + base, rest = names + assert len(base) == 2 and len(rest) == 38, errmsg + hex = base + rest + hex_to_sha(hex) + return hex + +  def serializable_property(name, docstring=None):   def set(obj, value):   obj._ensure_parsed() @@ -120,6 +141,20 @@
  or identity.find(">", email_end + 1) >= 0   or not identity.endswith(">")):   raise ObjectFormatException(error_msg) + + +class FixedSha(object): + """SHA object that behaves like hashlib's but is given a fixed value.""" + + def __init__(self, hexsha): + self._hexsha = hexsha + self._sha = hex_to_sha(hexsha) + + def digest(self): + return self._sha + + def hexdigest(self): + return self._hexsha      class ShaFile(object): @@ -282,6 +317,7 @@
  try:   try:   obj = cls._parse_file_header(f) + obj._sha = FixedSha(filename_to_hex(filename))   obj._needs_parsing = True   obj._needs_serialization = True   return obj
Show Entire File dulwich/​tests/​data/​blobs/​6f/​670c0fb53f9463760b7295fbb814e965fb20c8 Stacked
renamed from dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8
(No changes)
Show Entire File dulwich/​tests/​data/​blobs/​95/​4a536f7819d40e6f637f849ee187dd10066349 Stacked
renamed from dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349
(No changes)
Show Entire File dulwich/​tests/​data/​blobs/​e6/​9de29bb2d1d6434b8b29ae775ad8c2e48c5391 Stacked
renamed from dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
(No changes)
Show Entire File dulwich/​tests/​data/​commits/​0d/​89f20333fbb1d2f3a94da77f4981373d8f4310 Stacked
renamed from dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
(No changes)
Show Entire File dulwich/​tests/​data/​commits/​5d/​ac377bdded4c9aeb8dff595f0faeebcc8498cc Stacked
renamed from dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc
(No changes)
Show Entire File dulwich/​tests/​data/​commits/​60/​dacdc733de308bb77bb76ce0fb0f9b44c9769e Stacked
renamed from dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
(No changes)
Show Entire File dulwich/​tests/​data/​tags/​71/​033db03a03c6a36721efcf1968dd8f8e0cf023 Stacked
renamed from dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023
(No changes)
Show Entire File dulwich/​tests/​data/​trees/​70/​c190eb48fa8bbb50ddc692a17b44cb781af7f6 Stacked
renamed from dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6
(No changes)
 
38
39
40
 
41
42
43
 
89
90
91
92
93
94
95
96
 
 
 
 
 
97
98
99
 
406
407
408
409
410
 
 
411
412
413
 
38
39
40
41
42
43
44
 
90
91
92
 
 
 
 
 
93
94
95
96
97
98
99
100
 
407
408
409
 
 
410
411
412
413
414
@@ -38,6 +38,7 @@
  Tag,   format_timezone,   hex_to_sha, + hex_to_filename,   check_hexsha,   check_identity,   parse_timezone, @@ -89,11 +90,11 @@
   class BlobReadTests(unittest.TestCase):   """Test decompression of blobs""" - - def get_sha_file(self, obj, base, sha): - return obj.from_file(os.path.join(os.path.dirname(__file__), - 'data', base, sha)) - + + def get_sha_file(self, cls, base, sha): + dir = os.path.join(os.path.dirname(__file__), 'data', base) + return cls.from_file(hex_to_filename(dir, sha)) +   def get_blob(self, sha):   """Return the blob named sha from the test data dir"""   return self.get_sha_file(Blob, 'blobs', sha) @@ -406,8 +407,8 @@
  self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])     def _do_test_parse_tree(self, parse_tree): - o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data', - 'trees', tree_sha)) + dir = os.path.join(os.path.dirname(__file__), 'data', 'trees') + o = Tree.from_file(hex_to_filename(dir, tree_sha))   o._parse_file()   self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],   list(parse_tree(o.as_raw_string())))