XTreeDB.class.php
16.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
<?php
/* ----------------------------------------------------------------------- *\
PHP4代码版XDB - (XTreeDB.class.php)
-----------------------------------------------------------------------
作者: 马明练(hightman) (MSN: MingL_Mar@msn.com) (php-QQ群: 17708754)
网站: http://www.hi-php.com
时间: 2007/05/01 (update: 2007/05/29)
版本: 0.1
目的: 取代 cdb/gdbm 快速存取分词词典, 因大部分用户缺少这些基础配件和知识
xdb 改自前身 hdb 为了更好的和C版兼容, 故修改. 目前此版产生了机器字
节序依赖, 故打开词典时会自做检查
功能:
这是一个类似于 cdb/gdbm 的 PHP 代码级数据类库, 通过 key, value 的方
式存取数据, 使用非常简单.
适用于快速根据唯一主键查找数据
效能:
1. 效率高(20万记录以上比php内建的cdb还要快), 经过优化后 35万记录时
树的最大深度为5, 查找效率高,单个文件
2. 文件小(缺省设置下, 基础数据约 100KB, 之后每条记录为 key, value的
总长度+13bytes
4. PHP 代码级, 修改维护方便
5. 提供内建二叉树优化函数, 提供存取结构图绘制接口, 提供遍历接口
6. 数据可快速更新, 而 cdb 是只读的或只写的
缺点:
1. 对于unique key来说, 一经增加不可清除 (可以将value设为空值)
2. 当更新 value 时, 如果新 value 较长则旧的记录直接作废, 长期修改
可能会导致文件有一些无用的膨胀, 这类情况可以调用遍历接口完全重
建整个数据库
3. 由于是 php 代码级的引擎, 性能上比 gdbm 没有什么优势
4. IO操作, 可以考虑将数据文件放到 memfs 上 (linux/bsd)
5. key 最大长度为 240bytes, value 最大长度为 65279 bytes, 整个文件最大为 4G
6. 不可排序和随机分页读取
用法: (主要的方法)
1. 建立类操作句柄, 构造函数: XTreeDB([int mask [, int base ]])
可选参数(仅针对新建数据有效): mask, base 均为整型数, 其中
mask 是 hash 求模的基数, 建议选一个质数, 大约为总记录数的 1/10 即可.
base 是 hash 数据计算的基数, 建议使用默认值. ``h = ((h << 5) + h) ^ c''
$XDB = new XTreeDB;
2. 打开数据文件, Bool Open(string fpath [, string mode])
必要参数 fpath 为数据文件的路径, 可选参数 mode 的值为 r 或 w, 分别表示只
读或读写方式打开数据库. 成功返回 true, 失败返回 false.
缺省情况下是以只读方式打开, 即 mode 的缺省值为 'r'
$XDB->Open('/path/to/dict.xdb');
或以读写方式打开(新建数据时必须), mode 值为 'w', 此时数据库可读可写, 并锁定写
$XDB->Open('/path/to/dict.xdb', 'w');
3. 根据 key 读取数据 mixed Get(string key [, bool verbose])
成功查找到 key 所对应的数据时返回数据内容, 类型为 string
当 key 不存在于数据库中时或产生错误直接返回 false
(*注* 当 verbose 被设为 true 时, 则返回一个完整的记录数组, 含 key&value, 仅用于调试目的)
$value = $XDB->Get($key);
或
$debug = $XDB->Get($key, true); print_r($debug);
4. 存入数据 bool Put(string key [, string value])
成功返回 true, 失败或出错返回 false , 必须以读写方式打开才可调用
注意存入的数据目前只支持 string 类型, 有特殊需要可以使用 php 内建的 serialize 将 array 转换
成 string 取出时再用 unserialize() 还原
$result = $XDB->Put($key, $value);
5. 关闭数据库, void Close()
$XDB->Close();
6. 查询文件版本号, string Version()
返回类似 XDB/0.1 之类的格式, 是当前文件的版本号
7. 记录遍历, mixed Next()
返回一条记录key, value 组成的数组, 并将内部指针往后移一位, 可调用 Reset() 重置指针
当没有记录时会返回 false, 典型应用如下
$XDB->Reset();
while ($tmp = $XDB->Next())
{
echo "$tmp[key] => $tmp[value]\n";
}
也可用于导出数据库重建新的数据库, 以清理过多的重写导致的文件空档.
8. 遍历指针复位, void Reset()
此函数仅为街拍 Next() 使用
$XDB->Reset();
9. 优化数据库, 将数据库中的 btree 转换成完全二叉树. void Optimize([int index])
由于数据库针对 key 进行 hash 分散到 mask 颗二叉树中, 故这里的 index 为 0~[mask-1]
缺省情况下 index 值为 -1 会优化整个数据库, 必须以读写方式打开的数据库才能用该方法
$XDB->Optimize();
10. 打印分析树, 绘出存贮结构的树状图, void Draw([int index])
参数 index 同 Optimize() 的参数, 本函数无返回值, 直接将结果 echo 出来, 仅用于调试和观看
分析
$XDB->Draw(0);
$XDB->Draw(1);
...
\* ----------------------------------------------------------------------- */
// Constant Define
define ('XDB_FLOAT_CHECK', 3.14);
define ('XDB_HASH_BASE', 0xf422f);
define ('XDB_HASH_PRIME', 2047);
define ('XDB_VERSION', 34);
define ('XDB_TAGNAME', 'XDB');
define ('XDB_MAXKLEN', 0xf0);
// Class object Declare
class Util_Utils_XTreeDB
{
// Public var
var $fd = false;
var $mode = 'r';
var $hash_base = XDB_HASH_BASE;
var $hash_prime = XDB_HASH_PRIME;
var $version = XDB_VERSION;
var $fsize = 0;
// Private
var $trave_stack = array();
var $trave_index = -1;
// Debug test
var $_io_times = 0;
// Constructor Function
function Util_Utils_XTreeDB($base = 0, $prime = 0)
{
if (0 != $base) $this->hash_base = $base;
if (0 != $prime) $this->hash_prime = $prime;
}
// Open the database: read | write
function Open($fpath, $mode = 'r')
{
// open the file
$this->Close();
$newdb = false;
if ($mode == 'w')
{
// write & read only
if (!($fd = @fopen($fpath, 'rb+')))
{
if (!($fd = @fopen($fpath, 'wb+')))
{
trigger_error("XDB::Open(" . basename($fpath) . ",w) failed.", E_USER_WARNING);
return false;
}
// create the header
$this->_write_header($fd);
// 32 = header, 8 = Pointer
$this->fsize = 32 + 8 * $this->hash_prime;
$newdb = true;
}
}
else
{
// read only
if (!($fd = @fopen($fpath, 'rb')))
{
trigger_error("XDB::Open(" . basename($fpath) . ",r) failed.", E_USER_WARNING);
return false;
}
}
// check the header
if (!$newdb && !$this->_check_header($fd))
{
trigger_error("XDB::Open(" . basename($fpath) . "), invalid xdb format.", E_USER_WARNING);
fclose($fd);
return false;
}
// set the variable
$this->fd = $fd;
$this->mode = $mode;
$this->Reset();
// lock the file description until close
if ($mode == 'w')
flock($this->fd, LOCK_EX);
return true;
}
// Insert Or Update the value
function Put($key, $value)
{
// check the file description
if (!$this->fd || $this->mode != 'w')
{
trigger_error("XDB::Put(), null db handler or readonly.", E_USER_WARNING);
return false;
}
// check the length
$klen = strlen($key);
$vlen = strlen($value);
if (!$klen || $klen > XDB_MAXKLEN)
return false;
// try to find the old data
$rec = $this->_get_record($key);
if (isset($rec['vlen']) && ($vlen <= $rec['vlen']))
{
// update the old value & length
if ($vlen > 0)
{
fseek($this->fd, $rec['voff'], SEEK_SET);
fwrite($this->fd, $value, $vlen);
}
if ($vlen < $rec['vlen'])
{
$newlen = $rec['len'] + $vlen - $rec['vlen'];
$newbuf = pack('I', $newlen);
fseek($this->fd, $rec['poff'] + 4, SEEK_SET);
fwrite($this->fd, $newbuf, 4);
}
return true;
}
// 构造数据
$new = array('loff' => 0, 'llen' => 0, 'roff' => 0, 'rlen' => 0);
if (isset($rec['vlen']))
{
$new['loff'] = $rec['loff'];
$new['llen'] = $rec['llen'];
$new['roff'] = $rec['roff'];
$new['rlen'] = $rec['rlen'];
}
$buf = pack('IIIIC', $new['loff'], $new['llen'], $new['roff'], $new['rlen'], $klen);
$buf .= $key . $value;
$len = $klen + $vlen + 17;
$off = $this->fsize;
fseek($this->fd, $off, SEEK_SET);
fwrite($this->fd, $buf, $len);
$this->fsize += $len;
$pbuf = pack('II', $off, $len);
fseek($this->fd, $rec['poff'], SEEK_SET);
fwrite($this->fd, $pbuf, 8);
return true;
}
// Read the value by key
function Get($key, $debug = false)
{
// check the file description
if (!$this->fd)
{
trigger_error("XDB::Get(), null db handler.", E_USER_WARNING);
return false;
}
$klen = strlen($key);
if ($klen == 0 || $klen > XDB_MAXKLEN)
return false;
// get the data?
$rec = $this->_get_record($key);
if ($debug)
return $rec;
if (!isset($rec['vlen']) || $rec['vlen'] == 0)
return false;
return $rec['value'];
}
// Read the each key & value
// return array(key => xxx, value => xxx)
function Next()
{
// check the file description
if (!$this->fd)
{
trigger_error("XDB::Next(), null db handler.", E_USER_WARNING);
return false;
}
// Traversal the all tree
if (!($ptr = array_pop($this->trave_stack)))
{
do
{
$this->trave_index++;
if ($this->trave_index >= $this->hash_prime)
break;
$poff = $this->trave_index * 8 + 32;
fseek($this->fd, $poff, SEEK_SET);
$buf = fread($this->fd, 8);
if (strlen($buf) != 8)
{
$ptr = false;
break;
}
$ptr = unpack('Ioff/Ilen', $buf);
}
while ($ptr['len'] == 0);
}
// end the all records?
if (!$ptr || $ptr['len'] == 0)
return false;
// read the record
$rec = $this->_tree_get_record($ptr['off'], $ptr['len']);
// push the left & right
if ($rec['llen'] != 0)
{
$left = array('off' => $rec['loff'], 'len' => $rec['llen']);
array_push($this->trave_stack, $left);
}
if ($rec['rlen'] != 0)
{
$right = array('off' => $rec['roff'], 'len' => $rec['rlen']);
array_push($this->trave_stack, $right);
}
// return value
return $rec;
}
// Traversal every tree... & debug to test
function Draw($i = -1)
{
if ($i < 0 || $i >= $this->hash_prime)
{
$i = 0;
$j = $this->hash_prime;
}
else
{
$j = $i + 1;
}
echo "Draw the XDB data [$i ~ $j]. (" . trim($this->Version()) . ")\n\n";
while ($i < $j)
{
$poff = $i * 8 + 32;
fseek($this->fd, $poff, SEEK_SET);
$buf = fread($this->fd, 8);
if (strlen($buf) != 8) break;
$ptr = unpack('Ioff/Ilen', $buf);
$this->_cur_depth = 0;
$this->_node_num = 0;
$this->_draw_node($ptr['off'], $ptr['len']);
echo "-------------------------------------------\n";
echo "Tree(xdb) [$i] max_depth: {$this->_cur_depth} nodes_num: {$this->_node_num}\n";
$i++;
}
}
// Reset the inner pointer
function Reset()
{
$this->trave_stack = array();
$this->trave_index = -1;
}
// Show the version
function Version()
{
$ver = (is_null($this) ? XDB_VERSION : $this->version);
$str = sprintf("%s/%d.%d", XDB_TAGNAME, ($ver >> 5), ($ver & 0x1f));
if (!is_null($this)) $str .= " <base={$this->hash_base}, prime={$this->hash_prime}>";
return $str;
}
// Close the DB
function Close()
{
if (!$this->fd)
return;
if ($this->mode == 'w')
{
$buf = pack('I', $this->fsize);
fseek($this->fd, 12, SEEK_SET);
fwrite($this->fd, $buf, 4);
flock($this->fd, LOCK_UN);
}
fclose($this->fd);
$this->fd = false;
}
// Optimize the tree
function Optimize($i = -1)
{
// check the file description
if (!$this->fd || $this->mode != 'w')
{
trigger_error("XDB::Optimize(), null db handler or readonly.", E_USER_WARNING);
return false;
}
// get the index zone:
if ($i < 0 || $i >= $this->hash_prime)
{
$i = 0;
$j = $this->hash_prime;
}
else
{
$j = $i + 1;
}
// optimize every index
while ($i < $j)
{
$this->_optimize_index($i);
$i++;
}
}
// optimize a node
function _optimize_index($index)
{
static $cmp = false;
$poff = $index * 8 + 32;
// save all nodes into array()
$this->_sync_nodes = array();
$this->_load_tree_nodes($poff);
$count = count($this->_sync_nodes);
if ($count < 3) return;
// sync the nodes, sort by key first
if ($cmp == false) $cmp = create_function('$a,$b', 'return strcmp($a[key],$b[key]);');
usort($this->_sync_nodes, $cmp);
$this->_reset_tree_nodes($poff, 0, $count - 1);
unset($this->_sync_nodes);
}
// load tree nodes
function _load_tree_nodes($poff)
{
fseek($this->fd, $poff, SEEK_SET);
$buf = fread($this->fd, 8);
if (strlen($buf) != 8) return;
$tmp = unpack('Ioff/Ilen', $buf);
if ($tmp['len'] == 0) return;
fseek($this->fd, $tmp['off'], SEEK_SET);
$rlen = XDB_MAXKLEN + 17;
if ($rlen > $tmp['len']) $rlen = $tmp['len'];
$buf = fread($this->fd, $rlen);
$rec = unpack('Iloff/Illen/Iroff/Irlen/Cklen', substr($buf, 0, 17));
$rec['off'] = $tmp['off'];
$rec['len'] = $tmp['len'];
$rec['key'] = substr($buf, 17, $rec['klen']);
$this->_sync_nodes[] = $rec;
unset($buf);
// left
if ($rec['llen'] != 0) $this->_load_tree_nodes($tmp['off']);
// right
if ($rec['rlen'] != 0) $this->_load_tree_nodes($tmp['off'] + 8);
}
// sync the tree
function _reset_tree_nodes($poff, $low, $high)
{
if ($low <= $high)
{
$mid = ($low+$high)>>1;
$node = $this->_sync_nodes[$mid];
$buf = pack('II', $node['off'], $node['len']);
// left
$this->_reset_tree_nodes($node['off'], $low, $mid - 1);
// right
$this->_reset_tree_nodes($node['off'] + 8, $mid + 1, $high);
}
else
{
$buf = pack('II', 0, 0);
}
fseek($this->fd, $poff, SEEK_SET);
fwrite($this->fd, $buf, 8);
}
// Privated Function
function _get_index($key)
{
$l = strlen($key);
$h = $this->hash_base;
while ($l--)
{
$h += ($h << 5);
$h ^= ord($key[$l]);
$h &= 0x7fffffff;
}
return ($h % $this->hash_prime);
}
// draw the tree nodes by off & len
function _draw_node($off, $len, $rl = 'T', $icon = '', $depth = 0)
{
if ($rl == 'T') echo '(T) ';
else
{
echo $icon;
if ($rl == 'L')
{
$icon .= ' ┃';
echo ' ┟(L) ';
}
else
{
$icon .= ' ';
echo ' └(R) ';
}
}
if ($len == 0)
{
echo "<NULL>\n";
return;
}
$rec = $this->_tree_get_record($off, $len);
echo "{$rec[key]} (vlen={$rec[vlen]}, voff={$rec[voff]})\n";
unset($rec['key'], $rec['value']);
// debug used
$this->_node_num++;
$depth++;
if ($depth >= $this->_cur_depth)
$this->_cur_depth = $depth;
// Left node & Right Node
$this->_draw_node($rec['loff'], $rec['llen'], 'L', $icon, $depth);
$this->_draw_node($rec['roff'], $rec['rlen'], 'R', $icon, $depth);
}
// Check XDB Header
function _check_header($fd)
{
fseek($fd, 0, SEEK_SET);
$buf = fread($fd, 32);
if (strlen($buf) !== 32) return false;
$hdr = unpack('a3tag/Cver/Ibase/Iprime/Ifsize/fcheck/a12reversed', $buf);
if ($hdr['tag'] != XDB_TAGNAME) return false;
// check the fsize
$fstat = fstat($fd);
if ($fstat['size'] != $hdr['fsize'])
return false;
// check float?
$this->hash_base = $hdr['base'];
$this->hash_prime = $hdr['prime'];
$this->version = $hdr['ver'];
$this->fsize = $hdr['fsize'];
return true;
}
// Write XDB Header
function _write_header($fd)
{
$buf = pack('a3CiiIfa12', XDB_TAGNAME, $this->version,
$this->hash_base, $this->hash_prime, 0, XDB_FLOAT_CHECK, '');
fseek($fd, 0, SEEK_SET);
fwrite($fd, $buf, 32);
}
// get the record by first key
function _get_record($key)
{
$this->_io_times = 1;
$index = ($this->hash_prime > 1 ? $this->_get_index($key) : 0);
$poff = $index * 8 + 32;
fseek($this->fd, $poff, SEEK_SET);
$buf = fread($this->fd, 8);
if (strlen($buf) == 8) $tmp = unpack('Ioff/Ilen', $buf);
else $tmp = array('off' => 0, 'len' => 0);
return $this->_tree_get_record($tmp['off'], $tmp['len'], $poff, $key);
}
// get the record by tree
function _tree_get_record($off, $len, $poff = 0, $key = '')
{
if ($len == 0)
return (array('poff' => $poff));
$this->_io_times++;
// get the data & compare the key data
fseek($this->fd, $off, SEEK_SET);
$rlen = XDB_MAXKLEN + 17;
if ($rlen > $len) $rlen = $len;
$buf = fread($this->fd, $rlen);
$rec = unpack('Iloff/Illen/Iroff/Irlen/Cklen', substr($buf, 0, 17));
$fkey = substr($buf, 17, $rec['klen']);
$cmp = ($key ? strcmp($key, $fkey) : 0);
if ($cmp > 0)
{
// --> right
unset($buf);
return $this->_tree_get_record($rec['roff'], $rec['rlen'], $off + 8, $key);
}
else if ($cmp < 0)
{
// <-- left
unset($buf);
return $this->_tree_get_record($rec['loff'], $rec['llen'], $off, $key);
}
else {
// found!!
$rec['poff'] = $poff;
$rec['off'] = $off;
$rec['len'] = $len;
$rec['voff'] = $off + 17 + $rec['klen'];
$rec['vlen'] = $len - 17 - $rec['klen'];
$rec['key'] = $fkey;
fseek($this->fd, $rec['voff'], SEEK_SET);
$rec['value'] = fread($this->fd, $rec['vlen']);
return $rec;
}
}
}
?>