FastDFS非常适合存储大量的小文件,遗憾的是本身不支持自定义文件名,文件名是存储成功以后根据存储位置生成的一个file_id。很多应用场景不得不使用自定义文件名,在不修改其源码的情况下,可以在存储客户端fdfs_client增加一个用来存储自定义文件名和fastdfs的file_id之间的映射关系的数据库间接实现自定义文件名的存取和访问,在这里我们选用了reids。顺便说一下,淘宝也有一个类似于FastDFS的文件存储系统TFS,对于自定义文件名,它是用mysql来存储映射关系的,我认为在高并发访问下mysql本身就是瓶颈,因此在这个方案中采用了redis。
准备工作:
fastdfs环境安装...略...(官方:https://code.google.com/p/fastdfs/)
redis环境安装...略...(官方:http://redis.io/)
用python实现,因此需要安装fastdfs的python客户端(下载:https://fastdfs.googlecode.com/files/fdfs_client-py-1.2.6.tar.gz)
python的redis客户端,到https://pypi.python.org/pypi/redis下载
主文件:fastdfsfun.py
# -*- coding: utf-8 -*-
import setting
from fdfs_client.client import *
from fdfs_client.exceptions import *
from fdfs_client.connection import *
import redis
import time
import logging
import random
logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.DEBUG)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
class RedisError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class fastdfsClient(Fdfs_client):
def __init__(self):
self.tracker_pool = ConnectionPool(**setting.fdfs_tracker)
self.timeout = setting.fdfs_tracker['timeout']
return None
def __del__(self):
try:
self.pool.destroy()
self.pool = None
except:
pass
class fastdfs(object):
def __init__(self):
'''
conf_file:配置文件
'''
self.fdfs_client = fastdfsClient()
self.fdfs_redis = []
for i in setting.fdfs_redis_dbs:
self.fdfs_redis.append(redis.Redis(host=i[0], port=i[1], db=i[2]))
def store_by_buffer(self,buf,filename=None,file_ext_name = None):
'''
buffer存储文件
参数:
filename:自定义文件名,如果不指定,将远程file_id作为文件名
file_ext_name:文件扩展名(可选),如果不指定,将根据自定义文件名智能判断
返回值:
{
'group':组名,
'file_id':不含组名的文件ID,
'size':文件尺寸,
'upload_time':上传时间
}
'''
if filename and random.choice(self.fdfs_redis).exists(filename):
logger.info('File(%s) exists.'%filename)
return random.choice(self.fdfs_redis).hgetall(filename)
t1 = time.time()
# try:
ret_dict = self.fdfs_client.upload_by_buffer(buf,file_ext_name)
# except Exception,e:
# logger.error('Error occurred while uploading: %s'%e.message)
# return None
t2 = time.time()
logger.info('Upload file(%s) by buffer, time consume: %fs' % (filename,(t2 - t1)))
for key in ret_dict:
logger.debug('[+] %s : %s' % (key, ret_dict[key]))
stored_filename = ret_dict['Remote file_id']
stored_filename_without_group = stored_filename[stored_filename.index('/')+1:]
if not filename:
filename =stored_filename_without_group
vmp = {'group':ret_dict['Group name'],'file_id':stored_filename_without_group,'size':ret_dict['Uploaded size'],'upload_time':int(time.time()*1000)}
try:
for i in self.fdfs_redis:
if not i.hmset(filename,vmp):
raise RedisError('Save Failure')
logger.info('Store file(%s) by buffer successful' % filename)
except Exception,e:
logger.error('Save info to Redis failure. rollback...')
try:
ret_dict = self.fdfs_client.delete_file(stored_filename)
except Exception,e:
logger.error('Error occurred while deleting: %s'%e.message)
return None
return vmp
def remove(self,filename):
'''
删除文件,
filename是用户自定义文件名
return True|False
'''
fileinfo = random.choice(self.fdfs_redis).hgetall(filename)
stored_filename = '%s/%s'%(fileinfo['group'],fileinfo['file_id'])
try:
ret_dict = self.fdfs_client.delete_file(stored_filename)
logger.info('Remove stored file successful')
except Exception,e:
logger.error('Error occurred while deleting: %s'%e.message)
return False
for i in self.fdfs_redis:
if not i.delete(filename):
logger.error('Remove fileinfo in redis failure')
logger.info('%s removed.'%filename)
return True
def download(self,filename):
'''
下载文件
返回二进制
'''
finfo = self.getInfo(filename)
if finfo:
ret = self.fdfs_client.download_to_buffer('%s/%s'%(finfo['group'],finfo['file_id']))
return ret['Content']
else:
logger.debug('%s is not exists'%filename)
return None
def list(self,pattern='*'):
'''
列出文件列表
'''
return random.choice(self.fdfs_redis).keys(pattern)
def getInfo(self,filename):
'''
获得文件信息
return:{
'group':组名,
'file_id':不含组名的文件ID,
'size':文件尺寸,
'upload_time':上传时间
}
'''
return random.choice(self.fdfs_redis).hgetall(filename)
fastdfs对象就是封装了redis支持的fastdfs client
其中引入的一个模块setting是配置fastdfs环境的。
setting.py
# -*- coding: utf-8 -*-
#fastdfs tracker, multiple tracker supported
fdfs_tracker = {
'host_tuple':('192.168.2.233','192.168.2.234'),
'port':22122,
'timeout':30,
'name':'Tracker Pool'
}
#fastdfs meta db, multiple redisdb supported
fdfs_redis_dbs = (
('192.168.2.233',6379,0),
('192.168.2.233',6379,1)
)
支持多个tracker和多个redis db,多个redis db是同步写入文件信息的,防止单点故障。