Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / index.py @ 959

History | View | Annotate | Download (18.7 KB)

1
# index.py -- File parser/writer for the git index file
2
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
3
#
4
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5
# General Public License as public by the Free Software Foundation; version 2.0
6
# or (at your option) any later version. You can redistribute it and/or
7
# modify it under the terms of either of these two licenses.
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
#
15
# You should have received a copy of the licenses; if not, see
16
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18
# License, Version 2.0.
19
#
20

    
21
"""Parser for the git index file format."""
22

    
23
import collections
24
import errno
25
import os
26
import stat
27
import struct
28
import sys
29

    
30
from dulwich.file import GitFile
31
from dulwich.objects import (
32
    Blob,
33
    S_IFGITLINK,
34
    S_ISGITLINK,
35
    Tree,
36
    hex_to_sha,
37
    sha_to_hex,
38
    )
39
from dulwich.pack import (
40
    SHA1Reader,
41
    SHA1Writer,
42
    )
43

    
44

    
45
IndexEntry = collections.namedtuple(
46
    'IndexEntry', [
47
        'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
48
        'flags'])
49

    
50

    
51
def pathsplit(path):
52
    """Split a /-delimited path into a directory part and a basename.
53

54
    :param path: The path to split.
55
    :return: Tuple with directory name and basename
56
    """
57
    try:
58
        (dirname, basename) = path.rsplit(b"/", 1)
59
    except ValueError:
60
        return (b"", path)
61
    else:
62
        return (dirname, basename)
63

    
64

    
65
def pathjoin(*args):
66
    """Join a /-delimited path.
67

68
    """
69
    return b"/".join([p for p in args if p])
70

    
71

    
72
def read_cache_time(f):
73
    """Read a cache time.
74

75
    :param f: File-like object to read from
76
    :return: Tuple with seconds and nanoseconds
77
    """
78
    return struct.unpack(">LL", f.read(8))
79

    
80

    
81
def write_cache_time(f, t):
82
    """Write a cache time.
83

84
    :param f: File-like object to write to
85
    :param t: Time to write (as int, float or tuple with secs and nsecs)
86
    """
87
    if isinstance(t, int):
88
        t = (t, 0)
89
    elif isinstance(t, float):
90
        (secs, nsecs) = divmod(t, 1.0)
91
        t = (int(secs), int(nsecs * 1000000000))
92
    elif not isinstance(t, tuple):
93
        raise TypeError(t)
94
    f.write(struct.pack(">LL", *t))
95

    
96

    
97
def read_cache_entry(f):
98
    """Read an entry from a cache file.
99

100
    :param f: File-like object to read from
101
    :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
102
    """
103
    beginoffset = f.tell()
104
    ctime = read_cache_time(f)
105
    mtime = read_cache_time(f)
106
    (dev, ino, mode, uid, gid, size, sha, flags, ) = \
107
        struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
108
    name = f.read((flags & 0x0fff))
109
    # Padding:
110
    real_size = ((f.tell() - beginoffset + 8) & ~7)
111
    f.read((beginoffset + real_size) - f.tell())
112
    return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
113
            sha_to_hex(sha), flags & ~0x0fff)
114

    
115

    
116
def write_cache_entry(f, entry):
117
    """Write an index entry to a file.
118

119
    :param f: File object
120
    :param entry: Entry to write, tuple with:
121
        (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
122
    """
123
    beginoffset = f.tell()
124
    (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
125
    write_cache_time(f, ctime)
126
    write_cache_time(f, mtime)
127
    flags = len(name) | (flags &~ 0x0fff)
128
    f.write(struct.pack(b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags))
129
    f.write(name)
130
    real_size = ((f.tell() - beginoffset + 8) & ~7)
131
    f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
132

    
133

    
134
def read_index(f):
135
    """Read an index file, yielding the individual entries."""
136
    header = f.read(4)
137
    if header != b'DIRC':
138
        raise AssertionError("Invalid index file header: %r" % header)
139
    (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2))
140
    assert version in (1, 2)
141
    for i in range(num_entries):
142
        yield read_cache_entry(f)
143

    
144

    
145
def read_index_dict(f):
146
    """Read an index file and return it as a dictionary.
147

148
    :param f: File object to read from
149
    """
150
    ret = {}
151
    for x in read_index(f):
152
        ret[x[0]] = IndexEntry(*x[1:])
153
    return ret
154

    
155

    
156
def write_index(f, entries):
157
    """Write an index file.
158

159
    :param f: File-like object to write to
160
    :param entries: Iterable over the entries to write
161
    """
162
    f.write(b'DIRC')
163
    f.write(struct.pack(b'>LL', 2, len(entries)))
164
    for x in entries:
165
        write_cache_entry(f, x)
166

    
167

    
168
def write_index_dict(f, entries):
169
    """Write an index file based on the contents of a dictionary.
170

171
    """
172
    entries_list = []
173
    for name in sorted(entries):
174
        entries_list.append((name,) + tuple(entries[name]))
175
    write_index(f, entries_list)
176

    
177

    
178
def cleanup_mode(mode):
179
    """Cleanup a mode value.
180

181
    This will return a mode that can be stored in a tree object.
182

183
    :param mode: Mode to clean up.
184
    """
185
    if stat.S_ISLNK(mode):
186
        return stat.S_IFLNK
187
    elif stat.S_ISDIR(mode):
188
        return stat.S_IFDIR
189
    elif S_ISGITLINK(mode):
190
        return S_IFGITLINK
191
    ret = stat.S_IFREG | 0o644
192
    ret |= (mode & 0o111)
193
    return ret
194

    
195

    
196
class Index(object):
197
    """A Git Index file."""
198

    
199
    def __init__(self, filename):
200
        """Open an index file.
201

202
        :param filename: Path to the index file
203
        """
204
        self._filename = filename
205
        self.clear()
206
        self.read()
207

    
208
    @property
209
    def path(self):
210
        return self._filename
211

    
212
    def __repr__(self):
213
        return "%s(%r)" % (self.__class__.__name__, self._filename)
214

    
215
    def write(self):
216
        """Write current contents of index to disk."""
217
        f = GitFile(self._filename, 'wb')
218
        try:
219
            f = SHA1Writer(f)
220
            write_index_dict(f, self._byname)
221
        finally:
222
            f.close()
223

    
224
    def read(self):
225
        """Read current contents of index from disk."""
226
        if not os.path.exists(self._filename):
227
            return
228
        f = GitFile(self._filename, 'rb')
229
        try:
230
            f = SHA1Reader(f)
231
            for x in read_index(f):
232
                self[x[0]] = IndexEntry(*x[1:])
233
            # FIXME: Additional data?
234
            f.read(os.path.getsize(self._filename)-f.tell()-20)
235
            f.check_sha()
236
        finally:
237
            f.close()
238

    
239
    def __len__(self):
240
        """Number of entries in this index file."""
241
        return len(self._byname)
242

    
243
    def __getitem__(self, name):
244
        """Retrieve entry by relative path.
245

246
        :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
247
        """
248
        return self._byname[name]
249

    
250
    def __iter__(self):
251
        """Iterate over the paths in this index."""
252
        return iter(self._byname)
253

    
254
    def get_sha1(self, path):
255
        """Return the (git object) SHA1 for the object at a path."""
256
        return self[path].sha
257

    
258
    def get_mode(self, path):
259
        """Return the POSIX file mode for the object at a path."""
260
        return self[path].mode
261

    
262
    def iterblobs(self):
263
        """Iterate over path, sha, mode tuples for use with commit_tree."""
264
        for path in self:
265
            entry = self[path]
266
            yield path, entry.sha, cleanup_mode(entry.mode)
267

    
268
    def clear(self):
269
        """Remove all contents from this index."""
270
        self._byname = {}
271

    
272
    def __setitem__(self, name, x):
273
        assert isinstance(name, bytes)
274
        assert len(x) == 10
275
        # Remove the old entry if any
276
        self._byname[name] = x
277

    
278
    def __delitem__(self, name):
279
        assert isinstance(name, bytes)
280
        del self._byname[name]
281

    
282
    def iteritems(self):
283
        return self._byname.items()
284

    
285
    def update(self, entries):
286
        for name, value in entries.items():
287
            self[name] = value
288

    
289
    def changes_from_tree(self, object_store, tree, want_unchanged=False):
290
        """Find the differences between the contents of this index and a tree.
291

292
        :param object_store: Object store to use for retrieving tree contents
293
        :param tree: SHA1 of the root tree
294
        :param want_unchanged: Whether unchanged files should be reported
295
        :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
296
        """
297
        def lookup_entry(path):
298
            entry = self[path]
299
            return entry.sha, entry.mode
300
        for (name, mode, sha) in changes_from_tree(self._byname.keys(),
301
                lookup_entry, object_store, tree,
302
                want_unchanged=want_unchanged):
303
            yield (name, mode, sha)
304

    
305
    def commit(self, object_store):
306
        """Create a new tree from an index.
307

308
        :param object_store: Object store to save the tree in
309
        :return: Root tree SHA
310
        """
311
        return commit_tree(object_store, self.iterblobs())
312

    
313

    
314
def commit_tree(object_store, blobs):
315
    """Commit a new tree.
316

317
    :param object_store: Object store to add trees to
318
    :param blobs: Iterable over blob path, sha, mode entries
319
    :return: SHA1 of the created tree.
320
    """
321

    
322
    trees = {b'': {}}
323

    
324
    def add_tree(path):
325
        if path in trees:
326
            return trees[path]
327
        dirname, basename = pathsplit(path)
328
        t = add_tree(dirname)
329
        assert isinstance(basename, bytes)
330
        newtree = {}
331
        t[basename] = newtree
332
        trees[path] = newtree
333
        return newtree
334

    
335
    for path, sha, mode in blobs:
336
        tree_path, basename = pathsplit(path)
337
        tree = add_tree(tree_path)
338
        tree[basename] = (mode, sha)
339

    
340
    def build_tree(path):
341
        tree = Tree()
342
        for basename, entry in trees[path].items():
343
            if isinstance(entry, dict):
344
                mode = stat.S_IFDIR
345
                sha = build_tree(pathjoin(path, basename))
346
            else:
347
                (mode, sha) = entry
348
            tree.add(basename, mode, sha)
349
        object_store.add_object(tree)
350
        return tree.id
351
    return build_tree(b'')
352

    
353

    
354
def commit_index(object_store, index):
355
    """Create a new tree from an index.
356

357
    :param object_store: Object store to save the tree in
358
    :param index: Index file
359
    :note: This function is deprecated, use index.commit() instead.
360
    :return: Root tree sha.
361
    """
362
    return commit_tree(object_store, index.iterblobs())
363

    
364

    
365
def changes_from_tree(names, lookup_entry, object_store, tree,
366
        want_unchanged=False):
367
    """Find the differences between the contents of a tree and
368
    a working copy.
369

370
    :param names: Iterable of names in the working copy
371
    :param lookup_entry: Function to lookup an entry in the working copy
372
    :param object_store: Object store to use for retrieving tree contents
373
    :param tree: SHA1 of the root tree, or None for an empty tree
374
    :param want_unchanged: Whether unchanged files should be reported
375
    :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
376
        (oldsha, newsha)
377
    """
378
    other_names = set(names)
379

    
380
    if tree is not None:
381
        for (name, mode, sha) in object_store.iter_tree_contents(tree):
382
            try:
383
                (other_sha, other_mode) = lookup_entry(name)
384
            except KeyError:
385
                # Was removed
386
                yield ((name, None), (mode, None), (sha, None))
387
            else:
388
                other_names.remove(name)
389
                if (want_unchanged or other_sha != sha or other_mode != mode):
390
                    yield ((name, name), (mode, other_mode), (sha, other_sha))
391

    
392
    # Mention added files
393
    for name in other_names:
394
        try:
395
            (other_sha, other_mode) = lookup_entry(name)
396
        except KeyError:
397
            pass
398
        else:
399
            yield ((None, name), (None, other_mode), (None, other_sha))
400

    
401

    
402
def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
403
    """Create a new index entry from a stat value.
404

405
    :param stat_val: POSIX stat_result instance
406
    :param hex_sha: Hex sha of the object
407
    :param flags: Index flags
408
    """
409
    if mode is None:
410
        mode = cleanup_mode(stat_val.st_mode)
411
    return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
412
            stat_val.st_ino, mode, stat_val.st_uid,
413
            stat_val.st_gid, stat_val.st_size, hex_sha, flags)
414

    
415

    
416
def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
417
    """Build a file or symlink on disk based on a Git object.
418

419
    :param obj: The git object
420
    :param mode: File mode
421
    :param target_path: Path to write to
422
    :param honor_filemode: An optional flag to honor core.filemode setting in
423
        config file, default is core.filemode=True, change executable bit
424
    :return: stat object for the file
425
    """
426
    try:
427
        oldstat = os.lstat(target_path)
428
    except OSError as e:
429
        if e.errno == errno.ENOENT:
430
            oldstat = None
431
        else:
432
            raise
433
    contents = blob.as_raw_string()
434
    if stat.S_ISLNK(mode):
435
        # FIXME: This will fail on Windows. What should we do instead?
436
        if oldstat:
437
            os.unlink(target_path)
438
        os.symlink(contents, target_path)
439
    else:
440
        if oldstat is not None and oldstat.st_size == len(contents):
441
            with open(target_path, 'rb') as f:
442
                if f.read() == contents:
443
                    return oldstat
444

    
445
        with open(target_path, 'wb') as f:
446
            # Write out file
447
            f.write(contents)
448

    
449
        if honor_filemode:
450
            os.chmod(target_path, mode)
451

    
452
    return os.lstat(target_path)
453

    
454

    
455
INVALID_DOTNAMES = (b".git", b".", b"..", b"")
456

    
457

    
458
def validate_path_element_default(element):
459
    return element.lower() not in INVALID_DOTNAMES
460

    
461

    
462
def validate_path_element_ntfs(element):
463
    stripped = element.rstrip(b". ").lower()
464
    if stripped in INVALID_DOTNAMES:
465
        return False
466
    if stripped == b"git~1":
467
        return False
468
    return True
469

    
470

    
471
def validate_path(path, element_validator=validate_path_element_default):
472
    """Default path validator that just checks for .git/."""
473
    parts = path.split(b"/")
474
    for p in parts:
475
        if not element_validator(p):
476
            return False
477
    else:
478
        return True
479

    
480

    
481
def build_index_from_tree(root_path, index_path, object_store, tree_id,
482
                          honor_filemode=True,
483
                          validate_path_element=validate_path_element_default):
484
    """Generate and materialize index from a tree
485

486
    :param tree_id: Tree to materialize
487
    :param root_path: Target dir for materialized index files
488
    :param index_path: Target path for generated index
489
    :param object_store: Non-empty object store holding tree contents
490
    :param honor_filemode: An optional flag to honor core.filemode setting in
491
        config file, default is core.filemode=True, change executable bit
492
    :param validate_path_element: Function to validate path elements to check out;
493
        default just refuses .git and .. directories.
494

495
    :note:: existing index is wiped and contents are not merged
496
        in a working dir. Suitable only for fresh clones.
497
    """
498

    
499
    index = Index(index_path)
500
    if not isinstance(root_path, bytes):
501
        root_path = root_path.encode(sys.getfilesystemencoding())
502

    
503
    for entry in object_store.iter_tree_contents(tree_id):
504
        if not validate_path(entry.path, validate_path_element):
505
            continue
506
        full_path = _tree_to_fs_path(root_path, entry.path)
507

    
508
        if not os.path.exists(os.path.dirname(full_path)):
509
            os.makedirs(os.path.dirname(full_path))
510

    
511
        # FIXME: Merge new index into working tree
512
        if S_ISGITLINK(entry.mode):
513
            os.mkdir(full_path)
514
            st = os.lstat(full_path)
515
        else:
516
            obj = object_store[entry.sha]
517
            st = build_file_from_blob(obj, entry.mode, full_path,
518
                honor_filemode=honor_filemode)
519
        # Add file to index
520
        if not honor_filemode or S_ISGITLINK(entry.mode):
521
            # we can not use tuple slicing to build a new tuple,
522
            # because on windows that will convert the times to
523
            # longs, which causes errors further along
524
            st_tuple = (entry.mode, st.st_ino, st.st_dev, st.st_nlink,
525
                        st.st_uid, st.st_gid, st.st_size, st.st_atime,
526
                        st.st_mtime, st.st_ctime)
527
            st = st.__class__(st_tuple)
528
        index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
529

    
530
    index.write()
531

    
532

    
533
def blob_from_path_and_stat(fs_path, st):
534
    """Create a blob from a path and a stat object.
535

536
    :param fs_path: Full file system path to file
537
    :param st: A stat object
538
    :return: A `Blob` object
539
    """
540
    assert isinstance(fs_path, bytes)
541
    blob = Blob()
542
    if not stat.S_ISLNK(st.st_mode):
543
        with open(fs_path, 'rb') as f:
544
            blob.data = f.read()
545
    else:
546
        blob.data = os.readlink(fs_path)
547
    return blob
548

    
549

    
550
def get_unstaged_changes(index, root_path):
551
    """Walk through an index and check for differences against working tree.
552

553
    :param index: index to check
554
    :param root_path: path in which to find files
555
    :return: iterator over paths with unstaged changes
556
    """
557
    # For each entry in the index check the sha1 & ensure not staged
558
    if not isinstance(root_path, bytes):
559
        root_path = root_path.encode(sys.getfilesystemencoding())
560

    
561
    for tree_path, entry in index.iteritems():
562
        full_path = _tree_to_fs_path(root_path, tree_path)
563
        try:
564
            blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
565
        except OSError as e:
566
            if e.errno != errno.ENOENT:
567
                raise
568
            # The file was removed, so we assume that counts as
569
            # different from whatever file used to exist.
570
            yield tree_path
571
        else:
572
            if blob.id != entry.sha:
573
                yield tree_path
574

    
575

    
576
os_sep_bytes = os.sep.encode('ascii')
577

    
578

    
579
def _tree_to_fs_path(root_path, tree_path):
580
    """Convert a git tree path to a file system path.
581

582
    :param root_path: Root filesystem path
583
    :param tree_path: Git tree path as bytes
584

585
    :return: File system path.
586
    """
587
    assert isinstance(tree_path, bytes)
588
    if os_sep_bytes != b'/':
589
        sep_corrected_path = tree_path.replace(b'/', os_sep_bytes)
590
    else:
591
        sep_corrected_path = tree_path
592
    return os.path.join(root_path, sep_corrected_path)
593

    
594

    
595
def _fs_to_tree_path(fs_path, fs_encoding=None):
596
    """Convert a file system path to a git tree path.
597

598
    :param fs_path: File system path.
599
    :param fs_encoding: File system encoding
600

601
    :return:  Git tree path as bytes
602
    """
603
    if fs_encoding is None:
604
        fs_encoding = sys.getfilesystemencoding()
605
    if not isinstance(fs_path, bytes):
606
        fs_path_bytes = fs_path.encode(fs_encoding)
607
    else:
608
        fs_path_bytes = fs_path
609
    if os_sep_bytes != b'/':
610
        tree_path = fs_path_bytes.replace(os_sep_bytes, b'/')
611
    else:
612
        tree_path = fs_path_bytes
613
    return tree_path