Compare commits

...

10 Commits

Author SHA1 Message Date
Jan Philipp Timme 555e0e7696 Add a little more documentation on how to use GPGFS. 2015-12-08 01:20:13 +01:00
Jan Philipp Timme 765338955e Add *.log to .gitignore. 2015-12-08 01:05:14 +01:00
Jan Philipp Timme 4b22f7014e Ported gpgfs to Python 3.5. Whooo! 2015-12-08 01:03:45 +01:00
Jarno Seppänen 295a642522 Clean up empty encrypted directories 2014-05-18 12:40:07 +03:00
Jarno Seppänen a20e459b97 Remove spurious 'in' directory 2014-05-18 12:36:22 +03:00
Jarno Seppänen c6a8ce7daa GPG does zlib compression by default 2014-05-18 12:34:46 +03:00
Jarno Seppänen ec5ba51f98 Fix potential data loss if cleaning old files fails 2014-05-14 22:45:03 +03:00
Jarno Seppänen 079dcec131 Remove obsolete method _write_index 2014-05-14 22:39:35 +03:00
Jarno Seppänen f0c108411b Links to test suites 2014-05-14 22:37:47 +03:00
Jarno Seppänen 6018e19158 Robustness improvements:
* refactor error handling through common transaction() method
* maintain consistent index file on disk in the face of errors
* implement locking for multithreaded use
2014-05-14 22:35:58 +03:00
5 changed files with 241 additions and 247 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
*.pyc
*.log

View File

@ -2,14 +2,37 @@
GPG encryption for synchronized folders.
Usage: gpgfs <gpg_keyid> <encrypted_root> <mountpoint>
gpg_keyid: The identity of the gpg key to use. (See gpg -K)
encrypted_root: Path to folder where to store the encrypted data in.
mountpoint: Where to mount the filesystem.
# File structure
gpgfs/a/b
This represents the structure GPGFS will use to store the encrypted data on disk.
gpgfs/index
gpgfs/a/b
# Dependencies
pip install python-gnupg fusepy
GPGFS needs python3 and fuse support to run.
You can install the python dependencies using pip: (using a virtualenv may be helpful)
pip install -r requirements.txt
# Test suites
* ntfs-3g
http://sourceforge.net/p/ntfs-3g/pjd-fstest/ci/master/tree/
* tuxera
http://www.tuxera.com/community/posix-test-suite/
# BSD licensed

353
gpgfs.py
View File

@ -8,10 +8,12 @@ import sys
import logging
import struct
import time
from cStringIO import StringIO
from io import BytesIO
import gpgstore
from contextlib import contextmanager
from threading import Lock
magic = 'GPGFS1\n'
magic = b'GPGFS1\n'
log = logging.getLogger('gpgfs')
@ -20,56 +22,68 @@ class Entry:
Filesystem object, either file or directory.
'''
def __init__(self, **kwargs):
for k,v in kwargs.iteritems():
for k,v in kwargs.items():
setattr(self, k, v)
# entry types:
ENT_FILE = 0
ENT_DIR = 1
def read_index(store, path):
data = store.get(path)
buf = StringIO(data)
if buf.read(len(magic)) != magic:
raise IOError, 'index parse error: %s' % path
if not store.exists(path):
now = time.time()
root = Entry(children={}, nlink=3, size=0,
mode=stat.S_IFDIR | 0o755,
mtime=now, ctime=now)
write_index(store, path, root)
log.info('created %s', path)
return root
data = store.get(path, format=gpgstore.FMT_GPG)
buf = BytesIO(data)
temp = buf.read(len(magic))
if temp != magic:
raise IOError('index parse error: %s' % path)
read_atom(buf)
root = Entry(**read_dict(buf))
return root
def write_index(store, path, root):
buf = StringIO()
buf = BytesIO()
buf.write(magic)
header = ''
header = b''
write_atom(buf, header)
write_dict(buf, root)
store.put(buf.getvalue(), path=path)
store.put(buf.getvalue(), path=path, format=gpgstore.FMT_GPG)
def write_dict(fd, dct):
# breadth-first
children = []
buf = StringIO()
buf = BytesIO()
if not isinstance(dct, dict):
dct = dct.__dict__
for key in dct:
write_atom(buf, key.encode('utf8'))
write_atom(buf, key.encode('utf-8'))
val = dct[key]
if isinstance(val, dict):
buf.write('D')
buf.write(b'D')
children.append(val)
elif isinstance(val, Entry):
buf.write('E')
buf.write(b'E')
children.append(val)
elif isinstance(val, (int, long)):
buf.write('I')
elif isinstance(val, (int)):
if val < 2**32:
buf.write(b'I')
buf.write(struct.pack('<I', val))
elif isinstance(val, str):
buf.write('S')
write_atom(buf, val)
elif isinstance(val, unicode):
buf.write('U')
write_atom(buf, val.encode('utf8'))
else:
raise TypeError, type(val)
buf.write(b'L')
buf.write(struct.pack('<Q', val))
elif isinstance(val, float):
buf.write(b'F')
buf.write(struct.pack('<d', val))
elif isinstance(val, bytes):
buf.write(b'')
write_atom(buf, val)
elif isinstance(val, str):
buf.write(b'S')
write_atom(buf, val.encode('utf-8'))
else:
raise TypeError(type(val))
write_atom(fd, buf.getvalue())
for c in children:
write_dict(fd, c)
@ -78,21 +92,23 @@ def read_dict(fd):
dct = {}
buf = read_atom(fd)
buflen = len(buf)
buf = StringIO(buf)
buf = BytesIO(buf)
while buf.tell() < buflen:
key = read_atom(buf).decode('utf8')
key = read_atom(buf).decode('utf-8')
tag = buf.read(1)
if tag == 'D': val = read_dict(fd)
elif tag == 'E': val = Entry(**read_dict(fd))
elif tag == 'I': val = struct.unpack('<I', buf.read(4))[0]
elif tag == 'S': val = read_atom(buf)
elif tag == 'U': val = read_atom(buf).decode('utf8')
else: raise TypeError, tag
if tag == b'D': val = read_dict(fd)
elif tag == b'E': val = Entry(**read_dict(fd))
elif tag == b'I': val = struct.unpack('<I', buf.read(4))[0]
elif tag == b'L': val = struct.unpack('<Q', buf.read(8))[0]
elif tag == b'F': val = struct.unpack('<d', buf.read(8))[0]
elif tag == b'': val = read_atom(buf)
elif tag == b'S': val = read_atom(buf).decode('utf-8')
else: raise TypeError(tag)
dct[key] = val
return dct
def write_atom(fd, atom):
assert isinstance(atom, str)
assert isinstance(atom, bytes)
fd.write(struct.pack('<I', len(atom)))
fd.write(atom)
@ -111,9 +127,12 @@ class LoggingMixIn:
try:
ret = getattr(self, op)(path, *args)
return ret
except OSError, e:
except OSError as e:
ret = str(e)
raise
except:
log.exception('unhandled error in %s:', op)
raise
finally:
rtxt = repr(ret)
if op=='read':
@ -133,21 +152,11 @@ class GpgFs(LoggingMixIn, Operations):
#self.cache = cache
self.store = gpgstore.GpgStore(self.encroot, keyid)
self.index_path = 'index'
if os.path.exists(self.encroot + '/' + self.index_path):
self.root = read_index(self.store, self.index_path)
else:
self.root = Entry(type=ENT_DIR, children={},
st_mode=0755,
st_mtime=int(time.time()),
st_ctime=int(time.time()))
self._write_index()
log.info('created %s', self.index_path)
self.txlock = Lock()
self.fd = 0
self._clear_write_cache()
def _write_index(self):
write_index(self.store, self.index_path, self.root)
def _find(self, path, parent=False):
assert path.startswith('/')
if path == '/':
@ -171,43 +180,56 @@ class GpgFs(LoggingMixIn, Operations):
self.write_len = 0
self.write_dirty = False
@contextmanager
def transaction(self):
paths = {'old': None, 'new': None}
def putx(data, old_path = None):
paths['new'] = self.store.put(data)
paths['old'] = old_path
return paths['new']
with self.txlock:
try:
yield putx
# commit
write_index(self.store, self.index_path, self.root)
except:
# rollback
try:
log.warning('starting rollback')
self.root = read_index(self.store, self.index_path)
if paths['new']:
self.store.delete(paths['new'])
log.warning('rollback done')
except:
log.exception('rollback failed')
raise
if paths['old']:
self.store.delete(paths['old'])
def chmod(self, path, mode):
# sanitize mode (clear setuid/gid/sticky bits)
mode &= 0777
mode &= 0o777
with self.transaction():
ent = self._find(path)
if ent.type == ENT_DIR:
prev_mode = ent.st_mode
ent.st_mode = mode
try:
self._write_index()
except:
ent.st_mode = prev_mode
raise
else:
encpath = self.encroot + '/' + ent.path
os.chmod(encpath, mode)
ent.mode = mode | (ent.mode & 0o170000)
def chown(self, path, uid, gid):
raise FuseOSError(errno.ENOSYS)
def create(self, path, mode):
dir, path = self._find(path, parent=True)
if path in dir.children:
mode &= 0o777
mode |= stat.S_IFREG
with self.transaction() as putx:
parent, name = self._find(path, parent=True)
if name in parent.children:
raise FuseOSError(errno.EEXIST)
# FIXME mode
encpath = self.store.put('')
prev_mtime = dir.st_mtime
dir.children[path] = Entry(type=ENT_FILE, path=encpath, st_size=0)
now = time.time()
encpath = putx('')
parent.children[name] = Entry(mode=mode, encpath=encpath, size=0,
nlink=1, ctime=now, mtime=now,
encformat=gpgstore.FMT_GPG)
parent.mtime = now
log.debug('new path %s => %s', path, encpath)
dir.st_mtime = int(time.time())
try:
self._write_index()
except:
try: self.store.delete(encpath)
except: pass
del dir.children[path]
dir.st_mtime = prev_mtime
raise
self.fd += 1
return self.fd
@ -215,21 +237,12 @@ class GpgFs(LoggingMixIn, Operations):
if not self.write_dirty:
log.debug('nothing to flush')
return 0
buf = ''.join(self.write_buf)
with self.transaction() as putx:
buf = b''.join(self.write_buf)
self.write_buf = [buf]
ent = self._find(self.write_path)
prev_size = ent.st_size
prev_path = ent.path
ent.st_size = len(buf)
ent.path = self.store.put(buf)
try:
self._write_index()
except:
self.store.delete(ent.path)
ent.st_size = prev_size
ent.path = prev_path
raise
self.store.delete(prev_path)
ent.size = len(buf)
ent.encpath = putx(buf, ent.encpath)
self.write_dirty = False
log.debug('flushed %d bytes to %s', len(buf), self.write_path)
return 0
@ -239,19 +252,12 @@ class GpgFs(LoggingMixIn, Operations):
return 0
def getattr(self, path, fh = None):
# don't do full blown transaction
with self.txlock:
ent = self._find(path)
if ent.type == ENT_DIR:
return dict(st_mode = stat.S_IFDIR | ent.st_mode,
st_size = len(ent.children),
st_ctime = ent.st_ctime, st_mtime = ent.st_mtime,
st_atime = 0, st_nlink = 3)
# ensure st_size is up-to-date
self.flush(path, 0)
encpath = self.encroot + '/' + ent.path
s = os.stat(encpath)
return dict(st_mode = s.st_mode, st_size = ent.st_size,
st_atime = s.st_atime, st_mtime = s.st_mtime,
st_ctime = s.st_ctime, st_nlink = s.st_nlink)
return dict(st_mode = ent.mode, st_size = ent.size,
st_ctime = ent.ctime, st_mtime = ent.mtime,
st_atime = 0, st_nlink = ent.nlink)
def getxattr(self, path, name, position = 0):
raise FuseOSError(errno.ENODATA) # ENOATTR
@ -260,21 +266,16 @@ class GpgFs(LoggingMixIn, Operations):
return []
def mkdir(self, path, mode):
dir, path = self._find(path, parent=True)
if path in dir.children:
mode &= 0o777
mode |= stat.S_IFDIR
with self.transaction():
parent, name = self._find(path, parent=True)
if name in parent.children:
raise FuseOSError(errno.EEXIST)
prev_mtime = dir.st_mtime
dir.children[path] = Entry(type=ENT_DIR, children={},
st_mode=(mode & 0777),
st_mtime=int(time.time()),
st_ctime=int(time.time()))
dir.st_mtime = int(time.time())
try:
self._write_index()
except:
del dir.children[path]
dir.st_mtime = prev_mtime
raise
now = time.time()
parent.children[name] = Entry(children={}, mode=mode, nlink=2,
size=0, mtime=now, ctime=now)
parent.mtime = now
def open(self, path, flags):
return 0
@ -282,13 +283,13 @@ class GpgFs(LoggingMixIn, Operations):
def read(self, path, size, offset, fh):
self.flush(path, 0)
ent = self._find(path)
assert ent.type == ENT_FILE
data = self.store.get(ent.path)
assert ent.mode & stat.S_IFREG
data = self.store.get(ent.encpath, format=ent.encformat)
return data[offset:offset + size]
def readdir(self, path, fh):
dir = self._find(path)
return ['.', '..'] + list(dir.children)
dirent = self._find(path)
return ['.', '..'] + list(dirent.children)
def readlink(self, path):
raise FuseOSError(errno.ENOSYS)
@ -301,53 +302,38 @@ class GpgFs(LoggingMixIn, Operations):
self._clear_write_cache()
if new.startswith(old):
raise FuseOSError(errno.EINVAL)
with self.transaction():
old_dir, old_name = self._find(old, parent=True)
if old_name not in old_dir.children:
raise FuseOSError(errno.ENOENT)
new_dir, new_name = self._find(new, parent=True)
prev_ent = new_dir.children.get(new_name)
if prev_ent:
if prev_ent.type == ENT_DIR:
if old_dir[old_name].type != ENT_DIR:
old_ent = old_dir.children[old_name]
new_ent = new_dir.children.get(new_name)
if new_ent:
if new_ent.mode & stat.S_IFDIR:
if not old_ent.mode & stat.S_IFDIR:
raise FuseOSError(errno.EISDIR)
if prev_ent.children:
if new_ent.children:
raise FuseOSError(errno.ENOTEMPTY)
elif old_dir[old_name].type == ENT_DIR:
elif old_ent.mode & stat.S_IFDIR:
raise FuseOSError(errno.ENOTDIR)
prev_old_mtime = old_dir.st_mtime
prev_new_mtime = new_dir.st_mtime
new_dir.children[new_name] = old_dir.children.pop(old_name)
old_dir.st_mtime = new_dir.st_mtime = int(time.time())
try:
self._write_index()
except:
old_dir.children[old_name] = new_dir.children.pop(new_name)
if prev_ent:
new_dir.children[new_name] = prev_ent
old_dir.st_mtime = prev_old_mtime
new_dir.st_mtime = prev_new_mtime
raise
if prev_ent and prev_ent.type == ENT_FILE:
os.remove(self.encroot + '/' + prev_ent.path)
old_dir.mtime = new_dir.mtime = time.time()
if new_ent != None and new_ent.mode & stat.S_IFREG:
self.store.delete(new_ent.encpath)
def rmdir(self, path):
parent, path = self._find(path, parent=True)
if path not in parent.children:
with self.transaction():
parent, name = self._find(path, parent=True)
if name not in parent.children:
raise FuseOSError(errno.ENOENT)
ent = parent.children[path]
if ent.type != ENT_DIR:
ent = parent.children[name]
if not ent.mode & stat.S_IFDIR:
raise FuseOSError(errno.ENOTDIR)
if ent.children:
raise FuseOSError(errno.ENOTEMPTY)
prev_mtime = parent.st_mtime
del parent.children[path]
parent.st_mtime = int(time.time())
try:
self._write_index()
except:
parent.children[path] = ent
parent.st_mtime = prev_mtime
raise
del parent.children[name]
parent.mtime = time.time()
def setxattr(self, path, name, value, options, position = 0):
raise FuseOSError(errno.ENOSYS)
@ -361,69 +347,42 @@ class GpgFs(LoggingMixIn, Operations):
def truncate(self, path, length, fh = None):
self.flush(path, 0)
self._clear_write_cache()
with self.transaction() as putx:
ent = self._find(path)
if length == 0:
buf = ''
buf = b''
else:
buf = self.store.get(ent.path)
buf = self.store.get(ent.encpath, format=ent.encformat)
buf = buf[:length]
prev_size = ent.st_size
prev_path = ent.path
ent.st_size = length
ent.path = self.store.put(buf)
try:
self._write_index()
except:
os.remove(ent.path)
ent.st_size = prev_size
ent.path = prev_path
raise
self.store.delete(prev_path)
ent.encpath = putx(buf, ent.encpath)
ent.size = length
def unlink(self, path):
with self.transaction():
if self.write_path == path:
# no need to flush afterwards
self._clear_write_cache()
dir, name = self._find(path, parent=True)
if name not in dir.children:
parent, name = self._find(path, parent=True)
if name not in parent.children:
raise FuseOSError(errno.ENOENT)
ent = dir.children[name]
encpath = self.encroot + '/' + ent.path
del dir.children[name]
prev_mtime = dir.st_mtime
dir.st_mtime = int(time.time())
try:
self._write_index()
except:
dir.children[name] = ent
dir.st_mtime = prev_mtime
raise
os.remove(encpath)
ent = parent.children.pop(name)
parent.mtime = time.time()
self.store.delete(ent.encpath)
def utimens(self, path, times = None):
ent = self._find(path)
if ent.type == ENT_DIR:
prev_mtime = ent.st_mtime
if times is None:
ent.st_mtime = int(time.time())
mtime = time.time()
else:
ent.st_mtime = times[1]
try:
self._write_index()
except:
ent.st_mtime = prev_mtime
raise
else:
# flush may mess with mtime
self.flush(path, 0)
encpath = self.encroot + '/' + ent.path
os.utime(encpath, times)
mtime = times[1]
with self.transaction():
ent = self._find(path)
ent.mtime = mtime
def write(self, path, data, offset, fh):
ent = self._find(path)
if path != self.write_path:
self.flush(self.write_path, None)
buf = self.store.get(ent.path)
ent = self._find(path)
buf = self.store.get(ent.encpath, format=ent.encformat)
self.write_buf = [buf]
self.write_len = len(buf)
self.write_path = path
@ -431,7 +390,7 @@ class GpgFs(LoggingMixIn, Operations):
self.write_buf.append(data)
self.write_len += len(data)
else:
buf = ''.join(self.write_buf)
buf = b''.join(self.write_buf)
buf = buf[:offset] + data + buf[offset + len(data):]
self.write_buf = [buf]
self.write_len = len(buf)

View File

@ -2,37 +2,38 @@
import os
import gnupg
from binascii import hexlify
import zlib
import errno
import logging
log = logging.getLogger('gpgfs')
FMT_GPG = 0
class GpgStore(object):
def __init__(self, encroot, keyid):
self.encroot = encroot
self.keyid = keyid
self.gpg = gnupg.GPG()
def put(self, data, path=None):
def put(self, data, path=None, format=FMT_GPG):
assert format == FMT_GPG
if not path:
path = hexlify(os.urandom(20))
path = hexlify(os.urandom(20)).decode('utf-8')
path = path[:2] + '/' + path[2:]
encdir = self.encroot + '/' + path[:2]
if not os.path.exists(encdir):
os.mkdir(encdir, 0755)
data = zlib.compress(data, 1)
os.mkdir(encdir, 0o755)
res = self.gpg.encrypt(data, self.keyid, armor=False)
if not res.ok:
log.error("encryption failed (keyid %s), %s: %s",
self.keyid, res.status, path)
raise OSError(errno.EIO)
try:
with file(self.encroot + '/' + path + '.tmp', 'w') as fd:
with open(self.encroot + '/' + path + '.tmp', 'wb') as fd:
fd.write(res.data)
os.rename(self.encroot + '/' + path + '.tmp',
self.encroot + '/' + path)
except IOError, err:
except IOError as err:
log.error("write failed: %s: %s", path, str(err))
raise OSError(err.errno)
finally:
@ -41,10 +42,11 @@ class GpgStore(object):
log.debug('encrypted %s' % path)
return path
def get(self, path):
def get(self, path, format=FMT_GPG):
assert format == FMT_GPG
try:
data = file(self.encroot + '/' + path).read()
except OSError, err:
data = open(self.encroot + '/' + path, 'rb').read()
except OSError as err:
log.error("read failed: %s: %s", path, str(err))
raise
if not data:
@ -53,10 +55,17 @@ class GpgStore(object):
if not res.ok:
log.error("decryption failed, %s: %s", res.status, path)
raise OSError(errno.EIO)
data = zlib.decompress(res.data)
log.debug('decrypted %s' % path)
return data
return res.data
def delete(self, path):
os.remove(self.encroot + '/' + path)
if len(path) >= 3 and path[2] == '/':
try:
os.rmdir(self.encroot + '/' + path[:2])
except OSError:
pass
log.debug('deleted %s' % path)
def exists(self, path):
return os.path.exists(self.encroot + '/' + path)

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
python-gnupg
fusepy