From 02add3c977f2efcc15ddc6b785d52ade934f5b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jarno=20Sepp=C3=A4nen?= Date: Wed, 7 May 2014 22:59:54 +0300 Subject: [PATCH] Store metadata in an sqlite database --- gpgfs.py | 505 +++++++++++++++++++++++----------------------------- gpgstore.py | 3 + 2 files changed, 225 insertions(+), 283 deletions(-) diff --git a/gpgfs.py b/gpgfs.py index ec2a262..3a9ded6 100755 --- a/gpgfs.py +++ b/gpgfs.py @@ -6,12 +6,10 @@ import stat import os import sys import logging -import struct import time -from cStringIO import StringIO import gpgstore - -magic = 'GPGFS1\n' +import sqlite3 +from contextlib import contextmanager log = logging.getLogger('gpgfs') @@ -23,82 +21,6 @@ class Entry: for k,v in kwargs.iteritems(): setattr(self, k, v) -# entry types: -ENT_FILE = 0 -ENT_DIR = 1 - -def read_index(store, path): - data = store.get(path) - buf = StringIO(data) - if buf.read(len(magic)) != magic: - raise IOError, 'index parse error: %s' % path - read_atom(buf) - root = Entry(**read_dict(buf)) - return root - -def write_index(store, path, root): - buf = StringIO() - buf.write(magic) - header = '' - write_atom(buf, header) - write_dict(buf, root) - store.put(buf.getvalue(), path=path) - -def write_dict(fd, dct): - # breadth-first - children = [] - buf = StringIO() - if not isinstance(dct, dict): - dct = dct.__dict__ - for key in dct: - write_atom(buf, key.encode('utf8')) - val = dct[key] - if isinstance(val, dict): - buf.write('D') - children.append(val) - elif isinstance(val, Entry): - buf.write('E') - children.append(val) - elif isinstance(val, (int, long)): - buf.write('I') - buf.write(struct.pack(' %s', path, encpath) - dir.st_mtime = int(time.time()) - try: - self._write_index() - except: - try: self.store.delete(encpath) - except: pass - del dir.children[path] - dir.st_mtime = prev_mtime - raise - self.fd += 1 - return self.fd + mode &= 0777 + mode |= stat.S_IFREG + with transaction(self.db): + parent, name = self._find(path, parent=True) + sql = """ + INSERT INTO entry (name, parent_id, mode, nlink, ctime) + VALUES (?,?,?,?,?) + """ + now = time.time() + try: + self.db.execute(sql, [name, parent.id, mode, 1, now]) + except sqlite3.IntegrityError: + raise FuseOSError(errno.EEXIST) + self._put(path, '', transaction_=False) + sql = "UPDATE entry SET mtime=? WHERE id=?" + self.db.execute(sql, [now, parent.id]) + self.fd += 1 + return self.fd def flush(self, path, fh): if not self.write_dirty: @@ -217,19 +206,7 @@ class GpgFs(LoggingMixIn, Operations): return 0 buf = ''.join(self.write_buf) self.write_buf = [buf] - ent = self._find(self.write_path) - prev_size = ent.st_size - prev_path = ent.path - ent.st_size = len(buf) - ent.path = self.store.put(buf) - try: - self._write_index() - except: - self.store.delete(ent.path) - ent.st_size = prev_size - ent.path = prev_path - raise - self.store.delete(prev_path) + self._put(self.write_path, buf) self.write_dirty = False log.debug('flushed %d bytes to %s', len(buf), self.write_path) return 0 @@ -239,19 +216,11 @@ class GpgFs(LoggingMixIn, Operations): return 0 def getattr(self, path, fh = None): - ent = self._find(path) - if ent.type == ENT_DIR: - return dict(st_mode = stat.S_IFDIR | ent.st_mode, - st_size = len(ent.children), - st_ctime = ent.st_ctime, st_mtime = ent.st_mtime, - st_atime = 0, st_nlink = 3) - # ensure st_size is up-to-date - self.flush(path, 0) - encpath = self.encroot + '/' + ent.path - s = os.stat(encpath) - return dict(st_mode = s.st_mode, st_size = ent.st_size, - st_atime = s.st_atime, st_mtime = s.st_mtime, - st_ctime = s.st_ctime, st_nlink = s.st_nlink) + with transaction(self.db): + ent = self._find(path) + return dict(st_mode = ent.mode, st_size = ent.size, + st_ctime = ent.ctime, st_mtime = ent.mtime, + st_atime = 0, st_nlink = ent.nlink) def getxattr(self, path, name, position = 0): raise FuseOSError(errno.ENODATA) # ENOATTR @@ -260,21 +229,23 @@ class GpgFs(LoggingMixIn, Operations): return [] def mkdir(self, path, mode): - dir, path = self._find(path, parent=True) - if path in dir.children: - raise FuseOSError(errno.EEXIST) - prev_mtime = dir.st_mtime - dir.children[path] = Entry(type=ENT_DIR, children={}, - st_mode=(mode & 0777), - st_mtime=int(time.time()), - st_ctime=int(time.time())) - dir.st_mtime = int(time.time()) - try: - self._write_index() - except: - del dir.children[path] - dir.st_mtime = prev_mtime - raise + mode &= 0777 + mode |= stat.S_IFDIR + with transaction(self.db): + parent, name = self._find(path, parent=True) + sql = """ + INSERT INTO entry + (name, type, parent_id, mode, nlink, size, mtime, ctime) + VALUES (?,?,?,?,?,?,?,?) + """ + now = time.time() + try: + self.db.execute(sql, [name, parent.id, + mode, 2, 0, now, now]) + except sqlite3.IntegrityError: + raise FuseOSError(errno.EEXIST) + sql = "UPDATE entry SET mtime=? WHERE id=?" + self.db.execute(sql, [now, parent.id]) def open(self, path, flags): return 0 @@ -282,13 +253,18 @@ class GpgFs(LoggingMixIn, Operations): def read(self, path, size, offset, fh): self.flush(path, 0) ent = self._find(path) - assert ent.type == ENT_FILE - data = self.store.get(ent.path) + assert ent.mode & stat.S_IFREG + try: + data = self.store.get(ent.encpath) + except IOError: + raise FuseOSError(errno.ENOENT) return data[offset:offset + size] def readdir(self, path, fh): - dir = self._find(path) - return ['.', '..'] + list(dir.children) + dirent = self._find(path) + sql = "SELECT name FROM entry WHERE parent_id=?" + self.db.execute(sql, [dirent.id]) + return ['.', '..'] + [name for name, in self.db] def readlink(self, path): raise FuseOSError(errno.ENOSYS) @@ -301,53 +277,43 @@ class GpgFs(LoggingMixIn, Operations): self._clear_write_cache() if new.startswith(old): raise FuseOSError(errno.EINVAL) - old_dir, old_name = self._find(old, parent=True) - if old_name not in old_dir.children: - raise FuseOSError(errno.ENOENT) - new_dir, new_name = self._find(new, parent=True) - prev_ent = new_dir.children.get(new_name) - if prev_ent: - if prev_ent.type == ENT_DIR: - if old_dir[old_name].type != ENT_DIR: - raise FuseOSError(errno.EISDIR) - if prev_ent.children: - raise FuseOSError(errno.ENOTEMPTY) - elif old_dir[old_name].type == ENT_DIR: - raise FuseOSError(errno.ENOTDIR) - prev_old_mtime = old_dir.st_mtime - prev_new_mtime = new_dir.st_mtime - new_dir.children[new_name] = old_dir.children.pop(old_name) - old_dir.st_mtime = new_dir.st_mtime = int(time.time()) - try: - self._write_index() - except: - old_dir.children[old_name] = new_dir.children.pop(new_name) - if prev_ent: - new_dir.children[new_name] = prev_ent - old_dir.st_mtime = prev_old_mtime - new_dir.st_mtime = prev_new_mtime - raise - if prev_ent and prev_ent.type == ENT_FILE: - os.remove(self.encroot + '/' + prev_ent.path) + with transaction(self.db): + old_ent = self._find(old) + new_ent = self._find(new, default=None) + old_parent, old_name = self._find(old, parent=True) + new_parent, new_name = self._find(new, parent=True) + if new_ent != None: + if new_ent.mode & stat.S_IFDIR: + if not old_ent.mode & stat.S_IFDIR: + raise FuseOSError(errno.EISDIR) + sql = "SELECT COUNT(*) FROM entry WHERE parent_id=?" + self.db.execute(sql, [new_ent.id]) + if self.db.fetchone()[0]: + raise FuseOSError(errno.ENOTEMPTY) + elif old_ent.mode & stat.S_IFDIR: + raise FuseOSError(errno.ENOTDIR) + sql = "DELETE FROM entry WHERE id=?" + self.db.execute(sql, [new_ent.id]) + sql = "UPDATE entry SET parent_id=? WHERE id=?" + self.db.execute(sql, [new_parent.id, old_ent.id]) + sql = "UPDATE entry SET mtime=? WHERE id IN (?,?)" + self.db.execute(sql, [time.time(), old_parent.id, new_parent.id]) + if new_ent != None and new_ent.mode & stat.S_IFREG: + self.store.delete(new_ent.encpath) def rmdir(self, path): - parent, path = self._find(path, parent=True) - if path not in parent.children: - raise FuseOSError(errno.ENOENT) - ent = parent.children[path] - if ent.type != ENT_DIR: - raise FuseOSError(errno.ENOTDIR) - if ent.children: - raise FuseOSError(errno.ENOTEMPTY) - prev_mtime = parent.st_mtime - del parent.children[path] - parent.st_mtime = int(time.time()) - try: - self._write_index() - except: - parent.children[path] = ent - parent.st_mtime = prev_mtime - raise + with transaction(self.db): + ent = self._find(path) + if not ent.mode & stat.S_IFDIR: + raise FuseOSError(errno.ENOTDIR) + sql = "SELECT COUNT(*) FROM entry WHERE parent_id=?" + self.db.execute(sql, [ent.id]) + if self.db.fetchone()[0]: + raise FuseOSError(errno.ENOTEMPTY) + sql = "DELETE FROM entry WHERE id=?" + self.db.execute(sql, [ent.id]) + sql = "UPDATE entry SET mtime=? WHERE id=?" + self.db.execute(sql, [time.time(), ent.parent_id]) def setxattr(self, path, name, value, options, position = 0): raise FuseOSError(errno.ENOSYS) @@ -361,69 +327,42 @@ class GpgFs(LoggingMixIn, Operations): def truncate(self, path, length, fh = None): self.flush(path, 0) self._clear_write_cache() - ent = self._find(path) - if length == 0: - buf = '' - else: - buf = self.store.get(ent.path) - buf = buf[:length] - prev_size = ent.st_size - prev_path = ent.path - ent.st_size = length - ent.path = self.store.put(buf) - try: - self._write_index() - except: - os.remove(ent.path) - ent.st_size = prev_size - ent.path = prev_path - raise - self.store.delete(prev_path) + with transaction(self.db): + ent = self._find(path) + if length == 0: + buf = '' + else: + buf = self.store.get(ent.encpath) + buf = buf[:length] + self._put(path, buf, transaction_=False) def unlink(self, path): - if self.write_path == path: - # no need to flush afterwards - self._clear_write_cache() - dir, name = self._find(path, parent=True) - if name not in dir.children: - raise FuseOSError(errno.ENOENT) - ent = dir.children[name] - encpath = self.encroot + '/' + ent.path - del dir.children[name] - prev_mtime = dir.st_mtime - dir.st_mtime = int(time.time()) - try: - self._write_index() - except: - dir.children[name] = ent - dir.st_mtime = prev_mtime - raise - os.remove(encpath) + with transaction(self.db): + if self.write_path == path: + # no need to flush afterwards + self._clear_write_cache() + ent = self._find(path) + sql = "DELETE FROM entry WHERE id=?" + self.db.execute(sql, [ent.id]) + sql = "UPDATE entry SET mtime=? WHERE id=?" + self.db.execute(sql, [time.time(), ent.parent_id]) + self.store.delete(ent.encpath) def utimens(self, path, times = None): - ent = self._find(path) - if ent.type == ENT_DIR: - prev_mtime = ent.st_mtime - if times is None: - ent.st_mtime = int(time.time()) - else: - ent.st_mtime = times[1] - try: - self._write_index() - except: - ent.st_mtime = prev_mtime - raise + if times is None: + mtime = time.time() else: - # flush may mess with mtime - self.flush(path, 0) - encpath = self.encroot + '/' + ent.path - os.utime(encpath, times) + mtime = times[1] + with transaction(self.db): + ent = self._find(path) + sql = "UPDATE entry SET mtime=? WHERE id=?" + self.db.execute(sql, [mtime, ent.id]) def write(self, path, data, offset, fh): - ent = self._find(path) if path != self.write_path: self.flush(self.write_path, None) - buf = self.store.get(ent.path) + ent = self._find(path) + buf = self.store.get(ent.encpath) self.write_buf = [buf] self.write_len = len(buf) self.write_path = path @@ -445,5 +384,5 @@ if __name__ == '__main__': logpath = os.path.join(os.path.dirname(__file__), 'gpgfs.log') log.addHandler(logging.FileHandler(logpath, 'w')) log.setLevel(logging.DEBUG) - fs = GpgFs(sys.argv[2], sys.argv[1]) + fs = GpgFs(sys.argv[2], sys.argv[3], sys.argv[1]) FUSE(fs, sys.argv[3], foreground=True) diff --git a/gpgstore.py b/gpgstore.py index 568606a..cb2304d 100644 --- a/gpgstore.py +++ b/gpgstore.py @@ -60,3 +60,6 @@ class GpgStore(object): def delete(self, path): os.remove(self.encroot + '/' + path) log.debug('deleted %s' % path) + + def exists(self, path): + return os.path.exists(self.encroot + '/' + path)