]> code.delx.au - refind/blobdiff - filesystems/fsw_reiserfs.c
Added filesystem drivers.
[refind] / filesystems / fsw_reiserfs.c
diff --git a/filesystems/fsw_reiserfs.c b/filesystems/fsw_reiserfs.c
new file mode 100644 (file)
index 0000000..7c403f6
--- /dev/null
@@ -0,0 +1,874 @@
+/**
+ * \file fsw_reiserfs.c
+ * ReiserFS file system driver code.
+ */
+
+/*-
+ * Copyright (c) 2006 Christoph Pfisterer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include "fsw_reiserfs.h"
+
+
+// functions
+
+static fsw_status_t fsw_reiserfs_volume_mount(struct fsw_reiserfs_volume *vol);
+static void         fsw_reiserfs_volume_free(struct fsw_reiserfs_volume *vol);
+static fsw_status_t fsw_reiserfs_volume_stat(struct fsw_reiserfs_volume *vol, struct fsw_volume_stat *sb);
+
+static fsw_status_t fsw_reiserfs_dnode_fill(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno);
+static void         fsw_reiserfs_dnode_free(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno);
+static fsw_status_t fsw_reiserfs_dnode_stat(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                        struct fsw_dnode_stat *sb);
+static fsw_status_t fsw_reiserfs_get_extent(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                        struct fsw_extent *extent);
+
+static fsw_status_t fsw_reiserfs_dir_lookup(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                        struct fsw_string *lookup_name, struct fsw_reiserfs_dnode **child_dno);
+static fsw_status_t fsw_reiserfs_dir_read(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                      struct fsw_shandle *shand, struct fsw_reiserfs_dnode **child_dno);
+
+static fsw_status_t fsw_reiserfs_readlink(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                      struct fsw_string *link);
+
+static fsw_status_t fsw_reiserfs_item_search(struct fsw_reiserfs_volume *vol,
+                                             fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset,
+                                             struct fsw_reiserfs_item *item);
+static fsw_status_t fsw_reiserfs_item_next(struct fsw_reiserfs_volume *vol,
+                                           struct fsw_reiserfs_item *item);
+static void fsw_reiserfs_item_release(struct fsw_reiserfs_volume *vol,
+                                      struct fsw_reiserfs_item *item);
+
+//
+// Dispatch Table
+//
+
+struct fsw_fstype_table   FSW_FSTYPE_TABLE_NAME(reiserfs) = {
+    { FSW_STRING_TYPE_ISO88591, 8, 8, "reiserfs" },
+    sizeof(struct fsw_reiserfs_volume),
+    sizeof(struct fsw_reiserfs_dnode),
+    
+    fsw_reiserfs_volume_mount,
+    fsw_reiserfs_volume_free,
+    fsw_reiserfs_volume_stat,
+    fsw_reiserfs_dnode_fill,
+    fsw_reiserfs_dnode_free,
+    fsw_reiserfs_dnode_stat,
+    fsw_reiserfs_get_extent,
+    fsw_reiserfs_dir_lookup,
+    fsw_reiserfs_dir_read,
+    fsw_reiserfs_readlink,
+};
+
+// misc data
+
+static fsw_u32  superblock_offsets[3] = {
+    REISERFS_DISK_OFFSET_IN_BYTES     >> REISERFS_SUPERBLOCK_BLOCKSIZEBITS,
+    REISERFS_OLD_DISK_OFFSET_IN_BYTES >> REISERFS_SUPERBLOCK_BLOCKSIZEBITS,
+    0
+};
+
+/**
+ * Mount an reiserfs volume. Reads the superblock and constructs the
+ * root directory dnode.
+ */
+
+static fsw_status_t fsw_reiserfs_volume_mount(struct fsw_reiserfs_volume *vol)
+{
+    fsw_status_t    status;
+    void            *buffer;
+    fsw_u32         blocksize;
+    int             i;
+    struct fsw_string s;
+    
+    // allocate memory to keep the superblock around
+    status = fsw_alloc(sizeof(struct reiserfs_super_block), &vol->sb);
+    if (status)
+        return status;
+    
+    // read the superblock into its buffer
+    fsw_set_blocksize(vol, REISERFS_SUPERBLOCK_BLOCKSIZE, REISERFS_SUPERBLOCK_BLOCKSIZE);
+    for (i = 0; superblock_offsets[i]; i++) {
+        status = fsw_block_get(vol, superblock_offsets[i], 0, &buffer);
+        if (status)
+            return status;
+        fsw_memcpy(vol->sb, buffer, sizeof(struct reiserfs_super_block));
+        fsw_block_release(vol, superblock_offsets[i], buffer);
+        
+        // check for one of the magic strings
+        if (fsw_memeq(vol->sb->s_v1.s_magic,
+                      REISERFS_SUPER_MAGIC_STRING, 8)) {
+            vol->version = REISERFS_VERSION_1;
+            break;
+        } else if (fsw_memeq(vol->sb->s_v1.s_magic,
+                             REISER2FS_SUPER_MAGIC_STRING, 9)) {
+            vol->version = REISERFS_VERSION_2;
+            break;
+        } else if (fsw_memeq(vol->sb->s_v1.s_magic,
+                             REISER2FS_JR_SUPER_MAGIC_STRING, 9)) {
+            vol->version = vol->sb->s_v1.s_version;
+            if (vol->version == REISERFS_VERSION_1 || vol->version == REISERFS_VERSION_2)
+                break;
+        }
+    }
+    if (superblock_offsets[i] == 0)
+        return FSW_UNSUPPORTED;
+    
+    // check the superblock
+    if (vol->sb->s_v1.s_root_block == -1)   // unfinished 'reiserfsck --rebuild-tree'
+        return FSW_VOLUME_CORRUPTED;
+    
+    /*
+    if (vol->sb->s_rev_level != EXT2_GOOD_OLD_REV &&
+        vol->sb->s_rev_level != EXT2_DYNAMIC_REV)
+        return FSW_UNSUPPORTED;
+    if (vol->sb->s_rev_level == EXT2_DYNAMIC_REV &&
+        (vol->sb->s_feature_incompat & ~(EXT2_FEATURE_INCOMPAT_FILETYPE | EXT3_FEATURE_INCOMPAT_RECOVER)))
+        return FSW_UNSUPPORTED;
+    */
+    
+    // set real blocksize
+    blocksize = vol->sb->s_v1.s_blocksize;
+    fsw_set_blocksize(vol, blocksize, blocksize);
+    
+    // get other info from superblock
+    /*
+    vol->ind_bcnt = EXT2_ADDR_PER_BLOCK(vol->sb);
+    vol->dind_bcnt = vol->ind_bcnt * vol->ind_bcnt;
+    vol->inode_size = EXT2_INODE_SIZE(vol->sb);
+    */
+    
+    for (i = 0; i < 16; i++)
+        if (vol->sb->s_label[i] == 0)
+            break;
+    s.type = FSW_STRING_TYPE_ISO88591;
+    s.size = s.len = i;
+    s.data = vol->sb->s_label;
+    status = fsw_strdup_coerce(&vol->g.label, vol->g.host_string_type, &s);
+    if (status)
+        return status;
+    
+    // setup the root dnode
+    status = fsw_dnode_create_root(vol, REISERFS_ROOT_OBJECTID, &vol->g.root);
+    if (status)
+        return status;
+    vol->g.root->dir_id = REISERFS_ROOT_PARENT_OBJECTID;
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_volume_mount: success, blocksize %d tree height %d\n"),
+                   blocksize, vol->sb->s_v1.s_tree_height));
+    
+    return FSW_SUCCESS;
+}
+
+/**
+ * Free the volume data structure. Called by the core after an unmount or after
+ * an unsuccessful mount to release the memory used by the file system type specific
+ * part of the volume structure.
+ */
+
+static void fsw_reiserfs_volume_free(struct fsw_reiserfs_volume *vol)
+{
+    if (vol->sb)
+        fsw_free(vol->sb);
+}
+
+/**
+ * Get in-depth information on a volume.
+ */
+
+static fsw_status_t fsw_reiserfs_volume_stat(struct fsw_reiserfs_volume *vol, struct fsw_volume_stat *sb)
+{
+    sb->total_bytes = (fsw_u64)vol->sb->s_v1.s_block_count * vol->g.log_blocksize;
+    sb->free_bytes  = (fsw_u64)vol->sb->s_v1.s_free_blocks * vol->g.log_blocksize;
+    return FSW_SUCCESS;
+}
+
+/**
+ * Get full information on a dnode from disk. This function is called by the core
+ * whenever it needs to access fields in the dnode structure that may not
+ * be filled immediately upon creation of the dnode. In the case of reiserfs, we
+ * delay fetching of the stat data until dnode_fill is called. The size and
+ * type fields are invalid until this function has been called.
+ */
+
+static fsw_status_t fsw_reiserfs_dnode_fill(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno)
+{
+    fsw_status_t    status;
+    fsw_u32         item_len, mode;
+    struct fsw_reiserfs_item item;
+    
+    if (dno->sd_v1 || dno->sd_v2)
+        return FSW_SUCCESS;
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_dnode_fill: object %d/%d\n"), dno->dir_id, dno->g.dnode_id));
+    
+    // find stat data item in reiserfs tree
+    status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, 0, &item);
+    if (status == FSW_NOT_FOUND) {
+        FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: cannot find stat_data for object %d/%d\n"),
+                        dno->dir_id, dno->g.dnode_id));
+        return FSW_VOLUME_CORRUPTED;
+    }
+    if (status)
+        return status;
+    if (item.item_offset != 0) {
+        FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: got item that's not stat_data\n")));
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_VOLUME_CORRUPTED;
+    }
+    item_len = item.ih.ih_item_len;
+    
+    // get data in appropriate version
+    if (item.ih.ih_version == KEY_FORMAT_3_5 && item_len == SD_V1_SIZE) {
+        // have stat_data_v1 structure
+        status = fsw_memdup((void **)&dno->sd_v1, item.item_data, item_len);
+        fsw_reiserfs_item_release(vol, &item);
+        if (status)
+            return status;
+        
+        // get info from the inode
+        dno->g.size = dno->sd_v1->sd_size;
+        mode = dno->sd_v1->sd_mode;
+        
+    } else if (item.ih.ih_version == KEY_FORMAT_3_6 && item_len == SD_V2_SIZE) {
+        // have stat_data_v2 structure
+        status = fsw_memdup((void **)&dno->sd_v2, item.item_data, item_len);
+        fsw_reiserfs_item_release(vol, &item);
+        if (status)
+            return status;
+        
+        // get info from the inode
+        dno->g.size = dno->sd_v2->sd_size;
+        mode = dno->sd_v2->sd_mode;
+        
+    } else {
+        FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_dnode_fill: version %d(%d) and size %d(%d) not recognized for stat_data\n"),
+                        item.ih.ih_version, KEY_FORMAT_3_6, item_len, SD_V2_SIZE));
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_VOLUME_CORRUPTED;
+    }
+    
+    // get node type from mode field
+    if (S_ISREG(mode))
+        dno->g.type = FSW_DNODE_TYPE_FILE;
+    else if (S_ISDIR(mode))
+        dno->g.type = FSW_DNODE_TYPE_DIR;
+    else if (S_ISLNK(mode))
+        dno->g.type = FSW_DNODE_TYPE_SYMLINK;
+    else
+        dno->g.type = FSW_DNODE_TYPE_SPECIAL;
+    
+    return FSW_SUCCESS;
+}
+
+/**
+ * Free the dnode data structure. Called by the core when deallocating a dnode
+ * structure to release the memory used by the file system type specific part
+ * of the dnode structure.
+ */
+
+static void fsw_reiserfs_dnode_free(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno)
+{
+    if (dno->sd_v1)
+        fsw_free(dno->sd_v1);
+    if (dno->sd_v2)
+        fsw_free(dno->sd_v2);
+}
+
+/**
+ * Get in-depth information on a dnode. The core makes sure that fsw_reiserfs_dnode_fill
+ * has been called on the dnode before this function is called. Note that some
+ * data is not directly stored into the structure, but passed to a host-specific
+ * callback that converts it to the host-specific format.
+ */
+
+static fsw_status_t fsw_reiserfs_dnode_stat(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                            struct fsw_dnode_stat *sb)
+{
+    if (dno->sd_v1) {
+        if (dno->g.type == FSW_DNODE_TYPE_SPECIAL)
+            sb->used_bytes = 0;
+        else
+            sb->used_bytes = dno->sd_v1->u.sd_blocks * vol->g.log_blocksize;
+        sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->sd_v1->sd_ctime);
+        sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->sd_v1->sd_atime);
+        sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->sd_v1->sd_mtime);
+        sb->store_attr_posix(sb, dno->sd_v1->sd_mode);
+    } else if (dno->sd_v2) {
+        sb->used_bytes = dno->sd_v2->sd_blocks * vol->g.log_blocksize;
+        sb->store_time_posix(sb, FSW_DNODE_STAT_CTIME, dno->sd_v2->sd_ctime);
+        sb->store_time_posix(sb, FSW_DNODE_STAT_ATIME, dno->sd_v2->sd_atime);
+        sb->store_time_posix(sb, FSW_DNODE_STAT_MTIME, dno->sd_v2->sd_mtime);
+        sb->store_attr_posix(sb, dno->sd_v2->sd_mode);
+    }
+    
+    return FSW_SUCCESS;
+}
+
+/**
+ * Retrieve file data mapping information. This function is called by the core when
+ * fsw_shandle_read needs to know where on the disk the required piece of the file's
+ * data can be found. The core makes sure that fsw_reiserfs_dnode_fill has been called
+ * on the dnode before. Our task here is to get the physical disk block number for
+ * the requested logical block number.
+ */
+
+static fsw_status_t fsw_reiserfs_get_extent(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                            struct fsw_extent *extent)
+{
+    fsw_status_t    status;
+    fsw_u64         search_offset, intra_offset;
+    struct fsw_reiserfs_item item;
+    fsw_u32         intra_bno, nr_item;
+    
+    // Preconditions: The caller has checked that the requested logical block
+    //  is within the file's size. The dnode has complete information, i.e.
+    //  fsw_reiserfs_dnode_read_info was called successfully on it.
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_get_extent: mapping block %d of object %d/%d\n"),
+                   extent->log_start, dno->dir_id, dno->g.dnode_id));
+    
+    extent->type = FSW_EXTENT_TYPE_SPARSE;
+    extent->log_count = 1;
+    
+    // get the item for the requested block
+    search_offset = (fsw_u64)extent->log_start * vol->g.log_blocksize + 1;
+    status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, search_offset, &item);
+    if (status)
+        return status;
+    if (item.item_offset == 0) {
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_SUCCESS;       // no data items found, assume all-sparse file
+    }
+    intra_offset = search_offset - item.item_offset;
+    
+    // check the kind of block
+    if (item.item_type == TYPE_INDIRECT || item.item_type == V1_INDIRECT_UNIQUENESS) {
+        // indirect item, contains block numbers
+        
+        if (intra_offset & (vol->g.log_blocksize - 1)) {
+            FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: intra_offset not block-aligned for indirect block\n")));
+            goto bail;
+        }
+        intra_bno = (fsw_u32)FSW_U64_DIV(intra_offset, vol->g.log_blocksize);
+        nr_item = item.ih.ih_item_len / sizeof(fsw_u32);
+        if (intra_bno >= nr_item) {
+            FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: indirect block too small\n")));
+            goto bail;
+        }
+        extent->type = FSW_EXTENT_TYPE_PHYSBLOCK;
+        extent->phys_start = ((fsw_u32 *)item.item_data)[intra_bno];
+        
+        // TODO: check if the following blocks can be aggregated into one extent
+        
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_SUCCESS;
+        
+    } else if (item.item_type == TYPE_DIRECT || item.item_type == V1_DIRECT_UNIQUENESS) {
+        // direct item, contains file data
+        
+        // TODO: Check if direct items always start on block boundaries. If not, we may have
+        //  to do extra work here.
+        
+        if (intra_offset != 0) {
+            FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_get_extent: intra_offset not aligned for direct block\n")));
+            goto bail;
+        }
+        
+        extent->type = FSW_EXTENT_TYPE_BUFFER;
+        status = fsw_memdup(&extent->buffer, item.item_data, item.ih.ih_item_len);
+        fsw_reiserfs_item_release(vol, &item);
+        if (status)
+            return status;
+        
+        return FSW_SUCCESS;
+        
+    }
+    
+bail:
+    fsw_reiserfs_item_release(vol, &item);
+    return FSW_VOLUME_CORRUPTED;
+    
+    /*    
+    // check if the following blocks can be aggregated into one extent
+    file_bcnt = (fsw_u32)((dno->g.size + vol->g.log_blocksize - 1) & (vol->g.log_blocksize - 1));
+    while (path[i]           + extent->log_count < buf_bcnt &&    // indirect block has more block pointers
+           extent->log_start + extent->log_count < file_bcnt) {   // file has more blocks
+        if (buffer[path[i] + extent->log_count] == buffer[path[i] + extent->log_count - 1] + 1)
+            extent->log_count++;
+        else
+            break;
+    }
+    */
+}
+
+/**
+ * Lookup a directory's child dnode by name. This function is called on a directory
+ * to retrieve the directory entry with the given name. A dnode is constructed for
+ * this entry and returned. The core makes sure that fsw_reiserfs_dnode_fill has been called
+ * and the dnode is actually a directory.
+ */
+
+static fsw_status_t fsw_reiserfs_dir_lookup(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                            struct fsw_string *lookup_name, struct fsw_reiserfs_dnode **child_dno_out)
+{
+    fsw_status_t    status;
+    struct fsw_reiserfs_item item;
+    fsw_u32         nr_item, i, name_offset, next_name_offset, name_len;
+    fsw_u32         child_dir_id;
+    struct reiserfs_de_head *dhead;
+    struct fsw_string entry_name;
+    
+    // Preconditions: The caller has checked that dno is a directory node.
+    
+    // BIG TODOS: Use the hash function to start with the item containing the entry.
+    //  Use binary search within the item.
+    
+    entry_name.type = FSW_STRING_TYPE_ISO88591;
+    
+    // get the item for that position
+    status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, FIRST_ITEM_OFFSET, &item);
+    if (status)
+        return status;
+    if (item.item_offset == 0) {
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_NOT_FOUND;       // empty directory or something
+    }
+    
+    for(;;) {
+        
+        // search the directory item
+        dhead = (struct reiserfs_de_head *)item.item_data;
+        nr_item = item.ih.u.ih_entry_count;
+        next_name_offset = item.ih.ih_item_len;
+        for (i = 0; i < nr_item; i++, dhead++, next_name_offset = name_offset) {
+            // get the name
+            name_offset = dhead->deh_location;
+            name_len = next_name_offset - name_offset;
+            while (name_len > 0 && item.item_data[name_offset + name_len - 1] == 0)
+                name_len--;
+            
+            entry_name.len = entry_name.size = name_len;
+            entry_name.data = item.item_data + name_offset;
+            
+            // compare name
+            if (fsw_streq(lookup_name, &entry_name)) {
+                // found the entry we're looking for!
+                
+                // setup a dnode for the child item
+                status = fsw_dnode_create(dno, dhead->deh_objectid, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
+                child_dir_id = dhead->deh_dir_id;
+                fsw_reiserfs_item_release(vol, &item);
+                if (status)
+                    return status;
+                (*child_dno_out)->dir_id = child_dir_id;
+                
+                return FSW_SUCCESS;
+            }
+        }
+        
+        // We didn't find the next directory entry in this item. Look for the next
+        // item of the directory.
+        
+        status = fsw_reiserfs_item_next(vol, &item);
+        if (status)
+            return status;
+        
+    }
+}
+
+/**
+ * Get the next directory entry when reading a directory. This function is called during
+ * directory iteration to retrieve the next directory entry. A dnode is constructed for
+ * the entry and returned. The core makes sure that fsw_reiserfs_dnode_fill has been called
+ * and the dnode is actually a directory. The shandle provided by the caller is used to
+ * record the position in the directory between calls.
+ */
+
+static fsw_status_t fsw_reiserfs_dir_read(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                          struct fsw_shandle *shand, struct fsw_reiserfs_dnode **child_dno_out)
+{
+    fsw_status_t    status;
+    struct fsw_reiserfs_item item;
+    fsw_u32         nr_item, i, name_offset, next_name_offset, name_len;
+    fsw_u32         child_dir_id;
+    struct reiserfs_de_head *dhead;
+    struct fsw_string entry_name;
+    
+    // Preconditions: The caller has checked that dno is a directory node. The caller
+    //  has opened a storage handle to the directory's storage and keeps it around between
+    //  calls.
+    
+    // BIG TODOS: Use binary search within the item.
+    
+    // adjust pointer to first entry if necessary
+    if (shand->pos == 0)
+        shand->pos = FIRST_ITEM_OFFSET;
+    
+    // get the item for that position
+    status = fsw_reiserfs_item_search(vol, dno->dir_id, dno->g.dnode_id, shand->pos, &item);
+    if (status)
+        return status;
+    if (item.item_offset == 0) {
+        fsw_reiserfs_item_release(vol, &item);
+        return FSW_NOT_FOUND;       // empty directory or something
+    }
+    
+    for(;;) {
+        
+        // search the directory item
+        dhead = (struct reiserfs_de_head *)item.item_data;
+        nr_item = item.ih.u.ih_entry_count;
+        for (i = 0; i < nr_item; i++, dhead++) {
+            if (dhead->deh_offset < shand->pos)
+                continue;  // not yet past the last entry returned
+            if (dhead->deh_offset == DOT_OFFSET || dhead->deh_offset == DOT_DOT_OFFSET)
+                continue;  // never report . or ..
+            
+            // get the name
+            name_offset = dhead->deh_location;
+            if (i == 0)
+                next_name_offset = item.ih.ih_item_len;
+            else
+                next_name_offset = dhead[-1].deh_location;
+            name_len = next_name_offset - name_offset;
+            while (name_len > 0 && item.item_data[name_offset + name_len - 1] == 0)
+                name_len--;
+            
+            entry_name.type = FSW_STRING_TYPE_ISO88591;
+            entry_name.len = entry_name.size = name_len;
+            entry_name.data = item.item_data + name_offset;
+            
+            if (fsw_streq_cstr(&entry_name, ".reiserfs_priv"))
+                continue;  // never report this special file
+            
+            // found the next entry!
+            shand->pos = dhead->deh_offset + 1;
+            
+            // setup a dnode for the child item
+            status = fsw_dnode_create(dno, dhead->deh_objectid, FSW_DNODE_TYPE_UNKNOWN, &entry_name, child_dno_out);
+            child_dir_id = dhead->deh_dir_id;
+            fsw_reiserfs_item_release(vol, &item);
+            if (status)
+                return status;
+            (*child_dno_out)->dir_id = child_dir_id;
+            
+            return FSW_SUCCESS;
+        }
+        
+        // We didn't find the next directory entry in this item. Look for the next
+        // item of the directory.
+        
+        status = fsw_reiserfs_item_next(vol, &item);
+        if (status)
+            return status;
+        
+    }
+}
+
+/**
+ * Get the target path of a symbolic link. This function is called when a symbolic
+ * link needs to be resolved. The core makes sure that the fsw_reiserfs_dnode_fill has been
+ * called on the dnode and that it really is a symlink.
+ */
+
+static fsw_status_t fsw_reiserfs_readlink(struct fsw_reiserfs_volume *vol, struct fsw_reiserfs_dnode *dno,
+                                          struct fsw_string *link_target)
+{
+    return fsw_dnode_readlink_data(dno, link_target);
+}
+
+/**
+ * Compare an on-disk tree key against the search key.
+ */
+
+static int fsw_reiserfs_compare_key(struct reiserfs_key *key,
+                                    fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset)
+{
+    fsw_u32 key_type;
+    fsw_u64 key_offset;
+    
+    if (key->k_dir_id > dir_id)
+        return FIRST_GREATER;
+    if (key->k_dir_id < dir_id)
+        return SECOND_GREATER;
+    
+    if (key->k_objectid > objectid)
+        return FIRST_GREATER;
+    if (key->k_objectid < objectid)
+        return SECOND_GREATER;
+    
+    // determine format of the on-disk key
+    key_type = (fsw_u32)FSW_U64_SHR(key->u.k_offset_v2.v, 60);
+    if (key_type != TYPE_DIRECT && key_type != TYPE_INDIRECT && key_type != TYPE_DIRENTRY) {
+        // detected 3.5 format (_v1)
+        key_offset = key->u.k_offset_v1.k_offset;
+    } else {
+        // detected 3.6 format (_v2)
+        key_offset = key->u.k_offset_v2.v & (~0ULL >> 4);
+    }
+    if (key_offset > offset)
+        return FIRST_GREATER;
+    if (key_offset < offset)
+        return SECOND_GREATER;
+    return KEYS_IDENTICAL;
+}
+
+/**
+ * Find an item by key in the reiserfs tree.
+ */
+
+static fsw_status_t fsw_reiserfs_item_search(struct fsw_reiserfs_volume *vol,
+                                            fsw_u32 dir_id, fsw_u32 objectid, fsw_u64 offset,
+                                            struct fsw_reiserfs_item *item)
+{
+    fsw_status_t    status;
+    int             comp_result;
+    fsw_u32         tree_bno, next_tree_bno, tree_level, nr_item, i;
+    fsw_u8          *buffer;
+    struct block_head *bhead;
+    struct reiserfs_key *key;
+    struct item_head *ihead;
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_search: searching %d/%d/%lld\n"), dir_id, objectid, offset));
+    
+    // BIG TODOS: Use binary search within the item.
+    //  Remember tree path for "get next item" function.
+    
+    item->valid = 0;
+    item->block_bno = 0;
+    
+    // walk the tree
+    tree_bno = vol->sb->s_v1.s_root_block;
+    for (tree_level = vol->sb->s_v1.s_tree_height - 1; ; tree_level--) {
+        
+        // get the current tree block into memory
+        status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
+        if (status)
+            return status;
+        bhead = (struct block_head *)buffer;
+        if (bhead->blk_level != tree_level) {
+            FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_search: tree block %d has not expected level %d\n"), tree_bno, tree_level));
+            fsw_block_release(vol, tree_bno, buffer);
+            return FSW_VOLUME_CORRUPTED;
+        }
+        nr_item = bhead->blk_nr_item;
+        FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_search: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
+        item->path_bno[tree_level] = tree_bno;
+        
+        // check if we have reached a leaf block
+        if (tree_level == DISK_LEAF_NODE_LEVEL)
+            break;
+        
+        // search internal node block, look for the path to follow
+        key = (struct reiserfs_key *)(buffer + BLKH_SIZE);
+        for (i = 0; i < nr_item; i++, key++) {
+            if (fsw_reiserfs_compare_key(key, dir_id, objectid, offset) == FIRST_GREATER)
+                break;
+        }
+        item->path_index[tree_level] = i;
+        next_tree_bno = ((struct disk_child *)(buffer + BLKH_SIZE + nr_item * KEY_SIZE))[i].dc_block_number;
+        fsw_block_release(vol, tree_bno, buffer);
+        tree_bno = next_tree_bno;
+    }
+    
+    // search leaf node block, look for our data
+    ihead = (struct item_head *)(buffer + BLKH_SIZE);
+    for (i = 0; i < nr_item; i++, ihead++) {
+        comp_result = fsw_reiserfs_compare_key(&ihead->ih_key, dir_id, objectid, offset);
+        if (comp_result == KEYS_IDENTICAL)
+            break;
+        if (comp_result == FIRST_GREATER) {
+            // Current key is greater than the search key. Use the last key before this
+            // one as the preliminary result.
+            if (i == 0) {
+                fsw_block_release(vol, tree_bno, buffer);
+                return FSW_NOT_FOUND;
+            }
+            i--, ihead--;
+            break;
+        }
+    }
+    if (i >= nr_item) {
+        // Go back to the last key, it was smaller than the search key.
+        // NOTE: The first key of the next leaf block is guaranteed to be greater than
+        //  our search key.
+        i--, ihead--;
+    }
+    item->path_index[tree_level] = i;
+    // Since we may have a key that is smaller than the search key, verify that
+    // it is for the same object.
+    if (ihead->ih_key.k_dir_id != dir_id || ihead->ih_key.k_objectid != objectid) {
+        fsw_block_release(vol, tree_bno, buffer);
+        return FSW_NOT_FOUND;   // Found no key for this object at all
+    }
+    
+    // return results
+    fsw_memcpy(&item->ih, ihead, sizeof(struct item_head));
+    item->item_type = (fsw_u32)FSW_U64_SHR(ihead->ih_key.u.k_offset_v2.v, 60);
+    if (item->item_type != TYPE_DIRECT &&
+        item->item_type != TYPE_INDIRECT &&
+        item->item_type != TYPE_DIRENTRY) {
+        // 3.5 format (_v1)
+        item->item_type = ihead->ih_key.u.k_offset_v1.k_uniqueness;
+        item->item_offset = ihead->ih_key.u.k_offset_v1.k_offset;
+    } else {
+        // 3.6 format (_v2)
+        item->item_offset = ihead->ih_key.u.k_offset_v2.v & (~0ULL >> 4);
+    }
+    item->item_data = buffer + ihead->ih_item_location;
+    item->valid = 1;
+    
+    // add information for block release
+    item->block_bno = tree_bno;
+    item->block_buffer = buffer;
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_search: found %d/%d/%lld (%d)\n"),
+                   ihead->ih_key.k_dir_id, ihead->ih_key.k_objectid, item->item_offset, item->item_type));
+    return FSW_SUCCESS;
+}
+
+/**
+ * Find the next item in the reiserfs tree for an already-found item.
+ */
+
+static fsw_status_t fsw_reiserfs_item_next(struct fsw_reiserfs_volume *vol,
+                                          struct fsw_reiserfs_item *item)
+{
+    fsw_status_t    status;
+    fsw_u32         dir_id, objectid;
+    fsw_u64         offset;
+    fsw_u32         tree_bno, next_tree_bno, tree_level, nr_item, nr_ptr_item;
+    fsw_u8          *buffer;
+    struct block_head *bhead;
+    struct item_head *ihead;
+    
+    if (!item->valid)
+        return FSW_NOT_FOUND;
+    fsw_reiserfs_item_release(vol, item);   // TODO: maybe delay this and/or use the cached block!
+    
+    dir_id = item->ih.ih_key.k_dir_id;
+    objectid = item->ih.ih_key.k_objectid;
+    offset = item->item_offset;
+    
+    FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_next: next for %d/%d/%lld\n"), dir_id, objectid, offset));
+    
+    // find a node that has more items, moving up until we find one
+    
+    for (tree_level = DISK_LEAF_NODE_LEVEL; tree_level < vol->sb->s_v1.s_tree_height; tree_level++) {
+        
+        // get the current tree block into memory
+        tree_bno = item->path_bno[tree_level];
+        status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
+        if (status)
+            return status;
+        bhead = (struct block_head *)buffer;
+        if (bhead->blk_level != tree_level) {
+            FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_next: tree block %d has not expected level %d\n"), tree_bno, tree_level));
+            fsw_block_release(vol, tree_bno, buffer);
+            return FSW_VOLUME_CORRUPTED;
+        }
+        nr_item = bhead->blk_nr_item;
+        FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_next: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
+        
+        nr_ptr_item = nr_item + ((tree_level > DISK_LEAF_NODE_LEVEL) ? 1 : 0);  // internal nodes have (nr_item) keys and (nr_item+1) pointers
+        item->path_index[tree_level]++;
+        if (item->path_index[tree_level] >= nr_ptr_item) {
+            item->path_index[tree_level] = 0;
+            fsw_block_release(vol, tree_bno, buffer);
+            continue;  // this node doesn't have any more items, move up one level
+        }
+        
+        // we have a new path to follow, move down to the leaf node again
+        while (tree_level > DISK_LEAF_NODE_LEVEL) {
+            // get next pointer from current block
+            next_tree_bno = ((struct disk_child *)(buffer + BLKH_SIZE + nr_item * KEY_SIZE))[item->path_index[tree_level]].dc_block_number;
+            fsw_block_release(vol, tree_bno, buffer);
+            tree_bno = next_tree_bno;
+            tree_level--;
+            
+            // get the current tree block into memory
+            status = fsw_block_get(vol, tree_bno, tree_level, (void **)&buffer);
+            if (status)
+                return status;
+            bhead = (struct block_head *)buffer;
+            if (bhead->blk_level != tree_level) {
+                FSW_MSG_ASSERT((FSW_MSGSTR("fsw_reiserfs_item_next: tree block %d has not expected level %d\n"), tree_bno, tree_level));
+                fsw_block_release(vol, tree_bno, buffer);
+                return FSW_VOLUME_CORRUPTED;
+            }
+            nr_item = bhead->blk_nr_item;
+            FSW_MSG_DEBUGV((FSW_MSGSTR("fsw_reiserfs_item_next: visiting block %d level %d items %d\n"), tree_bno, tree_level, nr_item));
+            item->path_bno[tree_level] = tree_bno;
+        }
+        
+        // get the item from the leaf node
+        ihead = ((struct item_head *)(buffer + BLKH_SIZE)) + item->path_index[tree_level];
+        
+        // We now have the item that follows the previous one in the tree. Check that it
+        // belongs to the same object.
+        if (ihead->ih_key.k_dir_id != dir_id || ihead->ih_key.k_objectid != objectid) {
+            fsw_block_release(vol, tree_bno, buffer);
+            return FSW_NOT_FOUND;   // Found no next key for this object
+        }
+        
+        // return results
+        fsw_memcpy(&item->ih, ihead, sizeof(struct item_head));
+        item->item_type = (fsw_u32)FSW_U64_SHR(ihead->ih_key.u.k_offset_v2.v, 60);
+        if (item->item_type != TYPE_DIRECT &&
+            item->item_type != TYPE_INDIRECT &&
+            item->item_type != TYPE_DIRENTRY) {
+            // 3.5 format (_v1)
+            item->item_type = ihead->ih_key.u.k_offset_v1.k_uniqueness;
+            item->item_offset = ihead->ih_key.u.k_offset_v1.k_offset;
+        } else {
+            // 3.6 format (_v2)
+            item->item_offset = ihead->ih_key.u.k_offset_v2.v & (~0ULL >> 4);
+        }
+        item->item_data = buffer + ihead->ih_item_location;
+        item->valid = 1;
+        
+        // add information for block release
+        item->block_bno = tree_bno;
+        item->block_buffer = buffer;
+        
+        FSW_MSG_DEBUG((FSW_MSGSTR("fsw_reiserfs_item_next: found %d/%d/%lld (%d)\n"),
+                       ihead->ih_key.k_dir_id, ihead->ih_key.k_objectid, item->item_offset, item->item_type));
+        return FSW_SUCCESS;
+    }
+    
+    // we went to the highest level node and there still were no more items...
+    return FSW_NOT_FOUND;
+}
+
+/**
+ * Release the disk block still referenced by an item search result.
+ */
+
+static void fsw_reiserfs_item_release(struct fsw_reiserfs_volume *vol,
+                                      struct fsw_reiserfs_item *item)
+{
+    if (!item->valid)
+        return;
+    
+    if (item->block_bno > 0) {
+        fsw_block_release(vol, item->block_bno, item->block_buffer);
+        item->block_bno = 0;
+    }
+}
+
+// EOF