5-27 3,195 views
最近用hdfs上传了一个2.8g的视频,发现用原始的read方式会把整个视频加载到内存,会非常慢。所以使用hdfs的web api里面的按文件偏移量加载的方式。
# https://github.com/xaviered/php-WebHDFS/blob/master/README.md#status-of-a-filedirectory
$ composer require simpleenergy/php-webhdfs
public function read_hdfs() {
$filename = $this->request->param('filename');
$hdfs = new \org\apache\hadoop\WebHDFS(
$file_status = json_decode($hdfs->getFileStatus($filename),TRUE);
$response_ext = $hdfs->open($filename, 0,100);
$finfo = new \finfo(FILEINFO_MIME_TYPE);
$ext = $finfo->buffer($response_ext);
header( 'Content-Type:'.$ext);
list($start,) = $this->rangeHeader($file_status['FileStatus']['length']);
$length = 1024 * 1024;//1mb 每次加载
while (1) {
//$start 文件偏移量
$response = $hdfs->open($filename, $start,$length);
if (!$response) break;
echo $response;
$start += $length;
public function rangeHeader($size = 0) {
$length = $size; // Content length
$start = 0; // Start byte
$end = $size - 1; // End byte
header("Accept-Ranges: 0-$length");
if (isset($_SERVER['HTTP_RANGE'])) {
$c_end = $end;
// Extract the range string
list(, $range) = explode('=', $_SERVER['HTTP_RANGE'], 2);
// Make sure the client hasn't sent us a multibyte range
if (strpos($range, ',') !== false) {
// (?) Shoud this be issued here, or should the first
// range be used? Or should the header be ignored and
// we output the whole content?
header('HTTP/1.1 416 Requested Range Not Satisfiable');
header("Content-Range: bytes $start-$end/$size");
// (?) Echo some info to the client?
// If the range starts with an '-' we start from the beginning
// If not, we forward the file pointer
// And make sure to get the end byte if spesified
if ($range == '-') {
// The n-number of the last bytes is requested
$c_start = $size - substr($range, 1);
else {
$range = explode('-', $range);
$c_start = $range[0];
$c_end = (isset($range[1]) && is_numeric($range[1])) ? $range[1] : $size;
/* Check the range and make sure it's treated according to the specs.
* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
// End bytes can not be larger than $end.
$c_end = ($c_end > $end) ? $end : $c_end;
// Validate the requested range and return an error if it's not correct.
if ($c_start > $c_end || $c_start > $size - 1 || $c_end >= $size) {
header('HTTP/1.1 416 Requested Range Not Satisfiable');
header("Content-Range: bytes $start-$end/$size");
// (?) Echo some info to the client?
$start = $c_start;
$end = $c_end;
$length = $end - $start + 1; // Calculate new content length
header('HTTP/1.1 206 Partial Content');
// Notify the client the byte range we'll be outputting
header("Content-Range: bytes $start-$end/$size");
header("Content-Length: $length");
return [$start,$end];