<?php /* ----------------------------------------------------------------------- *\ PHP4代码版XDB - (XTreeDB.class.php) ----------------------------------------------------------------------- 作者: 马明练(hightman) (MSN: MingL_Mar@msn.com) (php-QQ群: 17708754) 网站: http://www.hi-php.com 时间: 2007/05/01 (update: 2007/05/29) 版本: 0.1 目的: 取代 cdb/gdbm 快速存取分词词典, 因大部分用户缺少这些基础配件和知识 xdb 改自前身 hdb 为了更好的和C版兼容, 故修改. 目前此版产生了机器字 节序依赖, 故打开词典时会自做检查 功能: 这是一个类似于 cdb/gdbm 的 PHP 代码级数据类库, 通过 key, value 的方 式存取数据, 使用非常简单. 适用于快速根据唯一主键查找数据 效能: 1. 效率高(20万记录以上比php内建的cdb还要快), 经过优化后 35万记录时 树的最大深度为5, 查找效率高,单个文件 2. 文件小(缺省设置下, 基础数据约 100KB, 之后每条记录为 key, value的 总长度+13bytes 4. PHP 代码级, 修改维护方便 5. 提供内建二叉树优化函数, 提供存取结构图绘制接口, 提供遍历接口 6. 数据可快速更新, 而 cdb 是只读的或只写的 缺点: 1. 对于unique key来说, 一经增加不可清除 (可以将value设为空值) 2. 当更新 value 时, 如果新 value 较长则旧的记录直接作废, 长期修改 可能会导致文件有一些无用的膨胀, 这类情况可以调用遍历接口完全重 建整个数据库 3. 由于是 php 代码级的引擎, 性能上比 gdbm 没有什么优势 4. IO操作, 可以考虑将数据文件放到 memfs 上 (linux/bsd) 5. key 最大长度为 240bytes, value 最大长度为 65279 bytes, 整个文件最大为 4G 6. 不可排序和随机分页读取 用法: (主要的方法) 1. 建立类操作句柄, 构造函数: XTreeDB([int mask [, int base ]]) 可选参数(仅针对新建数据有效): mask, base 均为整型数, 其中 mask 是 hash 求模的基数, 建议选一个质数, 大约为总记录数的 1/10 即可. base 是 hash 数据计算的基数, 建议使用默认值. ``h = ((h << 5) + h) ^ c'' $XDB = new XTreeDB; 2. 打开数据文件, Bool Open(string fpath [, string mode]) 必要参数 fpath 为数据文件的路径, 可选参数 mode 的值为 r 或 w, 分别表示只 读或读写方式打开数据库. 成功返回 true, 失败返回 false. 缺省情况下是以只读方式打开, 即 mode 的缺省值为 'r' $XDB->Open('/path/to/dict.xdb'); 或以读写方式打开(新建数据时必须), mode 值为 'w', 此时数据库可读可写, 并锁定写 $XDB->Open('/path/to/dict.xdb', 'w'); 3. 根据 key 读取数据 mixed Get(string key [, bool verbose]) 成功查找到 key 所对应的数据时返回数据内容, 类型为 string 当 key 不存在于数据库中时或产生错误直接返回 false (*注* 当 verbose 被设为 true 时, 则返回一个完整的记录数组, 含 key&value, 仅用于调试目的) $value = $XDB->Get($key); 或 $debug = $XDB->Get($key, true); print_r($debug); 4. 存入数据 bool Put(string key [, string value]) 成功返回 true, 失败或出错返回 false , 必须以读写方式打开才可调用 注意存入的数据目前只支持 string 类型, 有特殊需要可以使用 php 内建的 serialize 将 array 转换 成 string 取出时再用 unserialize() 还原 $result = $XDB->Put($key, $value); 5. 关闭数据库, void Close() $XDB->Close(); 6. 查询文件版本号, string Version() 返回类似 XDB/0.1 之类的格式, 是当前文件的版本号 7. 记录遍历, mixed Next() 返回一条记录key, value 组成的数组, 并将内部指针往后移一位, 可调用 Reset() 重置指针 当没有记录时会返回 false, 典型应用如下 $XDB->Reset(); while ($tmp = $XDB->Next()) { echo "$tmp[key] => $tmp[value]\n"; } 也可用于导出数据库重建新的数据库, 以清理过多的重写导致的文件空档. 8. 遍历指针复位, void Reset() 此函数仅为街拍 Next() 使用 $XDB->Reset(); 9. 优化数据库, 将数据库中的 btree 转换成完全二叉树. void Optimize([int index]) 由于数据库针对 key 进行 hash 分散到 mask 颗二叉树中, 故这里的 index 为 0~[mask-1] 缺省情况下 index 值为 -1 会优化整个数据库, 必须以读写方式打开的数据库才能用该方法 $XDB->Optimize(); 10. 打印分析树, 绘出存贮结构的树状图, void Draw([int index]) 参数 index 同 Optimize() 的参数, 本函数无返回值, 直接将结果 echo 出来, 仅用于调试和观看 分析 $XDB->Draw(0); $XDB->Draw(1); ... \* ----------------------------------------------------------------------- */ // Constant Define define ('XDB_FLOAT_CHECK', 3.14); define ('XDB_HASH_BASE', 0xf422f); define ('XDB_HASH_PRIME', 2047); define ('XDB_VERSION', 34); define ('XDB_TAGNAME', 'XDB'); define ('XDB_MAXKLEN', 0xf0); // Class object Declare class Util_Utils_XTreeDB { // Public var var $fd = false; var $mode = 'r'; var $hash_base = XDB_HASH_BASE; var $hash_prime = XDB_HASH_PRIME; var $version = XDB_VERSION; var $fsize = 0; // Private var $trave_stack = array(); var $trave_index = -1; // Debug test var $_io_times = 0; // Constructor Function function Util_Utils_XTreeDB($base = 0, $prime = 0) { if (0 != $base) $this->hash_base = $base; if (0 != $prime) $this->hash_prime = $prime; } // Open the database: read | write function Open($fpath, $mode = 'r') { // open the file $this->Close(); $newdb = false; if ($mode == 'w') { // write & read only if (!($fd = @fopen($fpath, 'rb+'))) { if (!($fd = @fopen($fpath, 'wb+'))) { trigger_error("XDB::Open(" . basename($fpath) . ",w) failed.", E_USER_WARNING); return false; } // create the header $this->_write_header($fd); // 32 = header, 8 = Pointer $this->fsize = 32 + 8 * $this->hash_prime; $newdb = true; } } else { // read only if (!($fd = @fopen($fpath, 'rb'))) { trigger_error("XDB::Open(" . basename($fpath) . ",r) failed.", E_USER_WARNING); return false; } } // check the header if (!$newdb && !$this->_check_header($fd)) { trigger_error("XDB::Open(" . basename($fpath) . "), invalid xdb format.", E_USER_WARNING); fclose($fd); return false; } // set the variable $this->fd = $fd; $this->mode = $mode; $this->Reset(); // lock the file description until close if ($mode == 'w') flock($this->fd, LOCK_EX); return true; } // Insert Or Update the value function Put($key, $value) { // check the file description if (!$this->fd || $this->mode != 'w') { trigger_error("XDB::Put(), null db handler or readonly.", E_USER_WARNING); return false; } // check the length $klen = strlen($key); $vlen = strlen($value); if (!$klen || $klen > XDB_MAXKLEN) return false; // try to find the old data $rec = $this->_get_record($key); if (isset($rec['vlen']) && ($vlen <= $rec['vlen'])) { // update the old value & length if ($vlen > 0) { fseek($this->fd, $rec['voff'], SEEK_SET); fwrite($this->fd, $value, $vlen); } if ($vlen < $rec['vlen']) { $newlen = $rec['len'] + $vlen - $rec['vlen']; $newbuf = pack('I', $newlen); fseek($this->fd, $rec['poff'] + 4, SEEK_SET); fwrite($this->fd, $newbuf, 4); } return true; } // 构造数据 $new = array('loff' => 0, 'llen' => 0, 'roff' => 0, 'rlen' => 0); if (isset($rec['vlen'])) { $new['loff'] = $rec['loff']; $new['llen'] = $rec['llen']; $new['roff'] = $rec['roff']; $new['rlen'] = $rec['rlen']; } $buf = pack('IIIIC', $new['loff'], $new['llen'], $new['roff'], $new['rlen'], $klen); $buf .= $key . $value; $len = $klen + $vlen + 17; $off = $this->fsize; fseek($this->fd, $off, SEEK_SET); fwrite($this->fd, $buf, $len); $this->fsize += $len; $pbuf = pack('II', $off, $len); fseek($this->fd, $rec['poff'], SEEK_SET); fwrite($this->fd, $pbuf, 8); return true; } // Read the value by key function Get($key, $debug = false) { // check the file description if (!$this->fd) { trigger_error("XDB::Get(), null db handler.", E_USER_WARNING); return false; } $klen = strlen($key); if ($klen == 0 || $klen > XDB_MAXKLEN) return false; // get the data? $rec = $this->_get_record($key); if ($debug) return $rec; if (!isset($rec['vlen']) || $rec['vlen'] == 0) return false; return $rec['value']; } // Read the each key & value // return array(key => xxx, value => xxx) function Next() { // check the file description if (!$this->fd) { trigger_error("XDB::Next(), null db handler.", E_USER_WARNING); return false; } // Traversal the all tree if (!($ptr = array_pop($this->trave_stack))) { do { $this->trave_index++; if ($this->trave_index >= $this->hash_prime) break; $poff = $this->trave_index * 8 + 32; fseek($this->fd, $poff, SEEK_SET); $buf = fread($this->fd, 8); if (strlen($buf) != 8) { $ptr = false; break; } $ptr = unpack('Ioff/Ilen', $buf); } while ($ptr['len'] == 0); } // end the all records? if (!$ptr || $ptr['len'] == 0) return false; // read the record $rec = $this->_tree_get_record($ptr['off'], $ptr['len']); // push the left & right if ($rec['llen'] != 0) { $left = array('off' => $rec['loff'], 'len' => $rec['llen']); array_push($this->trave_stack, $left); } if ($rec['rlen'] != 0) { $right = array('off' => $rec['roff'], 'len' => $rec['rlen']); array_push($this->trave_stack, $right); } // return value return $rec; } // Traversal every tree... & debug to test function Draw($i = -1) { if ($i < 0 || $i >= $this->hash_prime) { $i = 0; $j = $this->hash_prime; } else { $j = $i + 1; } echo "Draw the XDB data [$i ~ $j]. (" . trim($this->Version()) . ")\n\n"; while ($i < $j) { $poff = $i * 8 + 32; fseek($this->fd, $poff, SEEK_SET); $buf = fread($this->fd, 8); if (strlen($buf) != 8) break; $ptr = unpack('Ioff/Ilen', $buf); $this->_cur_depth = 0; $this->_node_num = 0; $this->_draw_node($ptr['off'], $ptr['len']); echo "-------------------------------------------\n"; echo "Tree(xdb) [$i] max_depth: {$this->_cur_depth} nodes_num: {$this->_node_num}\n"; $i++; } } // Reset the inner pointer function Reset() { $this->trave_stack = array(); $this->trave_index = -1; } // Show the version function Version() { $ver = (is_null($this) ? XDB_VERSION : $this->version); $str = sprintf("%s/%d.%d", XDB_TAGNAME, ($ver >> 5), ($ver & 0x1f)); if (!is_null($this)) $str .= " <base={$this->hash_base}, prime={$this->hash_prime}>"; return $str; } // Close the DB function Close() { if (!$this->fd) return; if ($this->mode == 'w') { $buf = pack('I', $this->fsize); fseek($this->fd, 12, SEEK_SET); fwrite($this->fd, $buf, 4); flock($this->fd, LOCK_UN); } fclose($this->fd); $this->fd = false; } // Optimize the tree function Optimize($i = -1) { // check the file description if (!$this->fd || $this->mode != 'w') { trigger_error("XDB::Optimize(), null db handler or readonly.", E_USER_WARNING); return false; } // get the index zone: if ($i < 0 || $i >= $this->hash_prime) { $i = 0; $j = $this->hash_prime; } else { $j = $i + 1; } // optimize every index while ($i < $j) { $this->_optimize_index($i); $i++; } } // optimize a node function _optimize_index($index) { static $cmp = false; $poff = $index * 8 + 32; // save all nodes into array() $this->_sync_nodes = array(); $this->_load_tree_nodes($poff); $count = count($this->_sync_nodes); if ($count < 3) return; // sync the nodes, sort by key first if ($cmp == false) $cmp = create_function('$a,$b', 'return strcmp($a[key],$b[key]);'); usort($this->_sync_nodes, $cmp); $this->_reset_tree_nodes($poff, 0, $count - 1); unset($this->_sync_nodes); } // load tree nodes function _load_tree_nodes($poff) { fseek($this->fd, $poff, SEEK_SET); $buf = fread($this->fd, 8); if (strlen($buf) != 8) return; $tmp = unpack('Ioff/Ilen', $buf); if ($tmp['len'] == 0) return; fseek($this->fd, $tmp['off'], SEEK_SET); $rlen = XDB_MAXKLEN + 17; if ($rlen > $tmp['len']) $rlen = $tmp['len']; $buf = fread($this->fd, $rlen); $rec = unpack('Iloff/Illen/Iroff/Irlen/Cklen', substr($buf, 0, 17)); $rec['off'] = $tmp['off']; $rec['len'] = $tmp['len']; $rec['key'] = substr($buf, 17, $rec['klen']); $this->_sync_nodes[] = $rec; unset($buf); // left if ($rec['llen'] != 0) $this->_load_tree_nodes($tmp['off']); // right if ($rec['rlen'] != 0) $this->_load_tree_nodes($tmp['off'] + 8); } // sync the tree function _reset_tree_nodes($poff, $low, $high) { if ($low <= $high) { $mid = ($low+$high)>>1; $node = $this->_sync_nodes[$mid]; $buf = pack('II', $node['off'], $node['len']); // left $this->_reset_tree_nodes($node['off'], $low, $mid - 1); // right $this->_reset_tree_nodes($node['off'] + 8, $mid + 1, $high); } else { $buf = pack('II', 0, 0); } fseek($this->fd, $poff, SEEK_SET); fwrite($this->fd, $buf, 8); } // Privated Function function _get_index($key) { $l = strlen($key); $h = $this->hash_base; while ($l--) { $h += ($h << 5); $h ^= ord($key[$l]); $h &= 0x7fffffff; } return ($h % $this->hash_prime); } // draw the tree nodes by off & len function _draw_node($off, $len, $rl = 'T', $icon = '', $depth = 0) { if ($rl == 'T') echo '(T) '; else { echo $icon; if ($rl == 'L') { $icon .= ' ┃'; echo ' ┟(L) '; } else { $icon .= ' '; echo ' └(R) '; } } if ($len == 0) { echo "<NULL>\n"; return; } $rec = $this->_tree_get_record($off, $len); echo "{$rec[key]} (vlen={$rec[vlen]}, voff={$rec[voff]})\n"; unset($rec['key'], $rec['value']); // debug used $this->_node_num++; $depth++; if ($depth >= $this->_cur_depth) $this->_cur_depth = $depth; // Left node & Right Node $this->_draw_node($rec['loff'], $rec['llen'], 'L', $icon, $depth); $this->_draw_node($rec['roff'], $rec['rlen'], 'R', $icon, $depth); } // Check XDB Header function _check_header($fd) { fseek($fd, 0, SEEK_SET); $buf = fread($fd, 32); if (strlen($buf) !== 32) return false; $hdr = unpack('a3tag/Cver/Ibase/Iprime/Ifsize/fcheck/a12reversed', $buf); if ($hdr['tag'] != XDB_TAGNAME) return false; // check the fsize $fstat = fstat($fd); if ($fstat['size'] != $hdr['fsize']) return false; // check float? $this->hash_base = $hdr['base']; $this->hash_prime = $hdr['prime']; $this->version = $hdr['ver']; $this->fsize = $hdr['fsize']; return true; } // Write XDB Header function _write_header($fd) { $buf = pack('a3CiiIfa12', XDB_TAGNAME, $this->version, $this->hash_base, $this->hash_prime, 0, XDB_FLOAT_CHECK, ''); fseek($fd, 0, SEEK_SET); fwrite($fd, $buf, 32); } // get the record by first key function _get_record($key) { $this->_io_times = 1; $index = ($this->hash_prime > 1 ? $this->_get_index($key) : 0); $poff = $index * 8 + 32; fseek($this->fd, $poff, SEEK_SET); $buf = fread($this->fd, 8); if (strlen($buf) == 8) $tmp = unpack('Ioff/Ilen', $buf); else $tmp = array('off' => 0, 'len' => 0); return $this->_tree_get_record($tmp['off'], $tmp['len'], $poff, $key); } // get the record by tree function _tree_get_record($off, $len, $poff = 0, $key = '') { if ($len == 0) return (array('poff' => $poff)); $this->_io_times++; // get the data & compare the key data fseek($this->fd, $off, SEEK_SET); $rlen = XDB_MAXKLEN + 17; if ($rlen > $len) $rlen = $len; $buf = fread($this->fd, $rlen); $rec = unpack('Iloff/Illen/Iroff/Irlen/Cklen', substr($buf, 0, 17)); $fkey = substr($buf, 17, $rec['klen']); $cmp = ($key ? strcmp($key, $fkey) : 0); if ($cmp > 0) { // --> right unset($buf); return $this->_tree_get_record($rec['roff'], $rec['rlen'], $off + 8, $key); } else if ($cmp < 0) { // <-- left unset($buf); return $this->_tree_get_record($rec['loff'], $rec['llen'], $off, $key); } else { // found!! $rec['poff'] = $poff; $rec['off'] = $off; $rec['len'] = $len; $rec['voff'] = $off + 17 + $rec['klen']; $rec['vlen'] = $len - 17 - $rec['klen']; $rec['key'] = $fkey; fseek($this->fd, $rec['voff'], SEEK_SET); $rec['value'] = fread($this->fd, $rec['vlen']); return $rec; } } } ?>