Kiln » Dependencies » Dulwich Read More
Clone URL:  
Pushed to one repository · View In Graph Contained in master-1, master-0, and 0.9.4

Rewrite add_thin_pack to use the fast PackIndexer.

Change-Id: I1cf586cbf02e207334fafd3d08c7c61231463826

Changeset 89fbab97356f

Parent d7496d7fe34b

committed by Jelmer Vernooij

authored by Dave Borowitz

Changes to 4 files · Browse files at 89fbab97356f Showing diff from parent d7496d7fe34b Diff from another changeset...

Change 1 of 1 Show Entire File NEWS Stacked
 
50
51
52
 
 
 
53
54
55
 
50
51
52
53
54
55
56
57
58
@@ -50,6 +50,9 @@
    * Extract a compute_file_sha function. (Dave Borowitz)   + * Remove move_in_thin_pack as a separate method; add_thin_pack now completes + the thin pack and moves it in in one step. (Dave Borowitz) +   TEST CHANGES     * If setuptools is installed, "python setup.py test" will now run the testsuite.
 
20
21
22
 
23
24
25
 
27
28
29
 
 
 
 
 
30
31
32
 
51
52
53
 
54
55
56
 
57
 
58
 
 
 
59
60
61
 
387
388
389
390
 
391
392
393
394
395
 
396
397
398
399
400
401
402
403
404
405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
407
408
409
410
 
 
411
412
413
414
415
416
417
418
 
 
 
 
 
419
420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
422
423
 
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
 
20
21
22
23
24
25
26
 
28
29
30
31
32
33
34
35
36
37
38
 
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
 
399
400
401
 
402
403
404
405
406
407
408
409
 
 
 
 
 
 
 
 
 
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
 
 
 
 
441
442
443
 
 
 
 
 
 
 
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
 
497
498
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
501
502
@@ -20,6 +20,7 @@
 """Git object store interfaces and implementation."""     +from cStringIO import StringIO  import errno  import itertools  import os @@ -27,6 +28,11 @@
 import tempfile  import urllib2   +from dulwich._compat import ( + make_sha, + SEEK_END, + SEEK_CUR, + )  from dulwich.diff_tree import (   tree_changes,   walk_trees, @@ -51,11 +57,17 @@
  Pack,   PackData,   ThinPackData, + obj_sha,   iter_sha1,   load_pack_index,   write_pack, + write_pack_header,   write_pack_index_v2, + write_pack_object,   write_pack_objects, + compute_file_sha, + PackIndexer, + PackStreamCopier,   )    INFODIR = 'info' @@ -387,37 +399,81 @@
  def _remove_loose_object(self, sha):   os.remove(self._get_shafile_path(sha))   - def move_in_thin_pack(self, path): + def _complete_thin_pack(self, f, path, copier, indexer):   """Move a specific file containing a pack into the pack directory.     :note: The file should be on the same file system as the   packs directory.   + :param f: Open file object for the pack.   :param path: Path to the pack file. - """ - data = ThinPackData(self.get_raw, path) - - # Write index for the thin pack (do we really need this?) - temppath = os.path.join(self.pack_dir, - sha_to_hex(urllib2.randombytes(20))+".tempidx") - data.create_index_v2(temppath) - p = Pack.from_objects(data, load_pack_index(temppath)) - + :param copier: A PackStreamCopier to use for writing pack data. + :param indexer: A PackIndexer for indexing the pack. + """ + entries = list(indexer) + + # Update the header with the new number of objects. + f.seek(0) + write_pack_header(f, len(entries) + len(indexer.ext_refs())) + + # Rescan the rest of the pack, computing the SHA with the new header. + new_sha = compute_file_sha(f, end_ofs=-20) + + # Complete the pack. + for ext_sha in indexer.ext_refs(): + type_num, data = self.get_raw(ext_sha) + offset = f.tell() + crc32 = write_pack_object(f, type_num, data, sha=new_sha) + entries.append((ext_sha, offset, crc32)) + pack_sha = new_sha.digest() + f.write(pack_sha) + f.close() + + # Move the pack in. + entries.sort() + pack_base_name = os.path.join( + self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries)) + os.rename(path, pack_base_name + '.pack') + + # Write the index. + index_file = GitFile(pack_base_name + '.idx', 'wb')   try: - # Write a full pack version - temppath = os.path.join(self.pack_dir, - sha_to_hex(urllib2.randombytes(20))+".temppack") - write_pack(temppath, p.pack_tuples()) + write_pack_index_v2(index_file, entries, pack_sha) + index_file.close()   finally: - p.close() - - pack_sha = load_pack_index(temppath+".idx").objects_sha1() - newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha) - os.rename(temppath+".pack", newbasename+".pack") - os.rename(temppath+".idx", newbasename+".idx") - final_pack = Pack(newbasename) + index_file.abort() + + # Add the pack to the store and return it. + final_pack = Pack(pack_base_name) + final_pack.check_length_and_checksum()   self._add_known_pack(final_pack)   return final_pack + + def add_thin_pack(self, read_all, read_some): + """Add a new thin pack to this object store. + + Thin packs are packs that contain deltas with parents that exist outside + the pack. They should never be placed in the object store directly, and + always indexed and completed as they are copied. + + :param read_all: Read function that blocks until the number of requested + bytes are read. + :param read_some: Read function that returns at least one byte, but may + not return the number of bytes requested. + :return: A Pack object pointing at the now-completed thin pack in the + objects/pack directory. + """ + fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_') + f = os.fdopen(fd, 'w+b') + + try: + indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) + copier = PackStreamCopier(read_all, read_some, f, + delta_iter=indexer) + copier.verify() + return self._complete_thin_pack(f, path, copier, indexer) + finally: + f.close()     def move_in_pack(self, path):   """Move a specific file containing a pack into the pack directory. @@ -441,24 +497,6 @@
  final_pack = Pack(basename)   self._add_known_pack(final_pack)   return final_pack - - def add_thin_pack(self): - """Add a new thin pack to this object store. - - Thin packs are packs that contain deltas with parents that exist - in a different pack. - """ - fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") - f = os.fdopen(fd, 'wb') - def commit(): - os.fsync(fd) - f.close() - if os.path.getsize(path) > 0: - return self.move_in_thin_pack(path) - else: - os.remove(path) - return None - return f, commit     def add_pack(self):   """Add a new pack to this object store.
Change 1 of 2 Show Entire File dulwich/​server.py Stacked
 
44
45
46
47
48
49
50
 
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
 
 
603
604
605
 
44
45
46
 
47
48
49
 
587
588
589
 
590
591
592
593
594
595
 
 
 
 
 
596
597
598
599
600
@@ -44,7 +44,6 @@
  hex_to_sha,   )  from dulwich.pack import ( - PackStreamCopier,   write_pack_objects,   )  from dulwich.protocol import ( @@ -588,18 +587,14 @@
  return ("report-status", "delete-refs", "side-band-64k")     def _apply_pack(self, refs): - f, commit = self.repo.object_store.add_thin_pack()   all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,   AssertionError, socket.error, zlib.error,   ObjectFormatException)   status = []   # TODO: more informative error messages than just the exception string   try: - PackStreamCopier(self.proto.read, self.proto.recv, f).verify() - p = commit() - if not p: - raise IOError('Failed to write pack') - p.check() + p = self.repo.object_store.add_thin_pack(self.proto.read, + self.proto.recv)   status.append(('unpack', 'ok'))   except all_exceptions, e:   status.append(('unpack', str(e).replace('\n', '')))
 
19
20
21
 
22
23
24
 
30
31
32
 
33
34
35
 
42
43
44
 
45
46
47
 
49
50
51
 
52
53
54
 
236
237
238
239
240
241
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
244
245
 
19
20
21
22
23
24
25
 
31
32
33
34
35
36
37
 
44
45
46
47
48
49
50
 
52
53
54
55
56
57
58
 
240
241
242
 
 
 
 
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
@@ -19,6 +19,7 @@
 """Tests for the object store interface."""     +from cStringIO import StringIO  import os  import shutil  import tempfile @@ -30,6 +31,7 @@
  NotTreeError,   )  from dulwich.objects import ( + sha_to_hex,   object_class,   Blob,   Tag, @@ -42,6 +44,7 @@
  tree_lookup_path,   )  from dulwich.pack import ( + REF_DELTA,   write_pack_objects,   )  from dulwich.tests import ( @@ -49,6 +52,7 @@
  )  from dulwich.tests.utils import (   make_object, + build_pack,   )     @@ -236,10 +240,22 @@
    def test_add_thin_pack(self):   o = DiskObjectStore(self.store_dir) - f, commit = o.add_thin_pack() - b = make_object(Blob, data="more yummy data") - write_pack_objects(f, [(b, None)]) - commit() + blob = make_object(Blob, data='yummy data') + o.add_object(blob) + + f = StringIO() + entries = build_pack(f, [ + (REF_DELTA, (blob.id, 'more yummy data')), + ], store=o) + pack = o.add_thin_pack(f.read, None) + + packed_blob_sha = sha_to_hex(entries[0][3]) + pack.check_length_and_checksum() + self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack)) + self.assertTrue(o.contains_packed(packed_blob_sha)) + self.assertTrue(o.contains_packed(blob.id)) + self.assertEqual((Blob.type_num, 'more yummy data'), + o.get_raw(packed_blob_sha))      class TreeLookupPathTests(TestCase):