3 * ext4 file system driver code.
7 * Copyright (c) 2012 Stefan Agner
8 * Portions Copyright (c) 2006 Christoph Pfisterer
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
30 static fsw_status_t
fsw_ext4_volume_mount(struct fsw_ext4_volume
*vol
);
31 static void fsw_ext4_volume_free(struct fsw_ext4_volume
*vol
);
32 static fsw_status_t
fsw_ext4_volume_stat(struct fsw_ext4_volume
*vol
, struct fsw_volume_stat
*sb
);
34 static fsw_status_t
fsw_ext4_dnode_fill(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
);
35 static void fsw_ext4_dnode_free(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
);
36 static fsw_status_t
fsw_ext4_dnode_stat(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
37 struct fsw_dnode_stat
*sb
);
38 static fsw_status_t
fsw_ext4_get_extent(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
39 struct fsw_extent
*extent
);
41 static fsw_status_t
fsw_ext4_dir_lookup(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
42 struct fsw_string
*lookup_name
, struct fsw_ext4_dnode
**child_dno
);
43 static fsw_status_t
fsw_ext4_dir_read(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
44 struct fsw_shandle
*shand
, struct fsw_ext4_dnode
**child_dno
);
45 static fsw_status_t
fsw_ext4_read_dentry(struct fsw_shandle
*shand
, struct ext4_dir_entry
*entry
);
47 static fsw_status_t
fsw_ext4_readlink(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
48 struct fsw_string
*link
);
54 struct fsw_fstype_table
FSW_FSTYPE_TABLE_NAME(ext4
) = {
55 { FSW_STRING_TYPE_ISO88591
, 4, 4, "ext4" },
56 sizeof(struct fsw_ext4_volume
),
57 sizeof(struct fsw_ext4_dnode
),
59 fsw_ext4_volume_mount
,
72 * Mount an ext4 volume. Reads the superblock and constructs the
73 * root directory dnode.
76 static fsw_status_t
fsw_ext4_volume_mount(struct fsw_ext4_volume
*vol
)
81 fsw_u32 groupcnt
, groupno
, gdesc_per_block
, gdesc_bno
, gdesc_index
;
82 struct ext4_group_desc
*gdesc
;
86 // allocate memory to keep the superblock around
87 status
= fsw_alloc(sizeof(struct ext4_super_block
), &vol
->sb
);
91 // read the superblock into its buffer
92 fsw_set_blocksize(vol
, EXT4_SUPERBLOCK_BLOCKSIZE
, EXT4_SUPERBLOCK_BLOCKSIZE
);
93 status
= fsw_block_get(vol
, EXT4_SUPERBLOCK_BLOCKNO
, 0, &buffer
);
96 fsw_memcpy(vol
->sb
, buffer
, sizeof(struct ext4_super_block
));
97 fsw_block_release(vol
, EXT4_SUPERBLOCK_BLOCKNO
, buffer
);
99 // check the superblock
100 if (vol
->sb
->s_magic
!= EXT4_SUPER_MAGIC
)
101 return FSW_UNSUPPORTED
;
102 if (vol
->sb
->s_rev_level
!= EXT4_GOOD_OLD_REV
&&
103 vol
->sb
->s_rev_level
!= EXT4_DYNAMIC_REV
)
104 return FSW_UNSUPPORTED
;
106 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: Incompat flag %x\n"), vol
->sb
->s_feature_incompat
));
108 if (vol
->sb
->s_rev_level
== EXT4_DYNAMIC_REV
&&
109 (vol
->sb
->s_feature_incompat
& ~(EXT4_FEATURE_INCOMPAT_FILETYPE
| EXT4_FEATURE_INCOMPAT_RECOVER
)))
110 return FSW_UNSUPPORTED
;
113 if (vol
->sb
->s_rev_level
== EXT4_DYNAMIC_REV
&&
114 (vol
->sb
->s_feature_incompat
& EXT4_FEATURE_INCOMPAT_RECOVER
))
116 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: This ext3 file system needs recovery\n")));
117 // Print(L"Ext4 WARNING: This file system needs recovery, trying to use it anyway.\n");
120 // set real blocksize
121 blocksize
= EXT4_BLOCK_SIZE(vol
->sb
);
122 fsw_set_blocksize(vol
, blocksize
, blocksize
);
124 // get other info from superblock
125 vol
->ind_bcnt
= EXT4_ADDR_PER_BLOCK(vol
->sb
);
126 vol
->dind_bcnt
= vol
->ind_bcnt
* vol
->ind_bcnt
;
127 vol
->inode_size
= EXT4_INODE_SIZE(vol
->sb
);
129 for (i
= 0; i
< 16; i
++)
130 if (vol
->sb
->s_volume_name
[i
] == 0)
132 s
.type
= FSW_STRING_TYPE_ISO88591
;
134 s
.data
= vol
->sb
->s_volume_name
;
135 status
= fsw_strdup_coerce(&vol
->g
.label
, vol
->g
.host_string_type
, &s
);
139 // read the group descriptors to get inode table offsets
140 groupcnt
= ((vol
->sb
->s_inodes_count
- 2) / vol
->sb
->s_inodes_per_group
) + 1;
141 gdesc_per_block
= (vol
->g
.phys_blocksize
/ sizeof(struct ext4_group_desc
));
143 status
= fsw_alloc(sizeof(fsw_u32
) * groupcnt
, &vol
->inotab_bno
);
146 for (groupno
= 0; groupno
< groupcnt
; groupno
++) {
147 // get the block group descriptor
148 gdesc_bno
= (vol
->sb
->s_first_data_block
+ 1) + groupno
/ gdesc_per_block
;
149 gdesc_index
= groupno
% gdesc_per_block
;
150 status
= fsw_block_get(vol
, gdesc_bno
, 1, (void **)&buffer
);
153 gdesc
= ((struct ext4_group_desc
*)(buffer
)) + gdesc_index
;
154 vol
->inotab_bno
[groupno
] = gdesc
->bg_inode_table_lo
;
155 fsw_block_release(vol
, gdesc_bno
, buffer
);
158 // setup the root dnode
159 status
= fsw_dnode_create_root(vol
, EXT4_ROOT_INO
, &vol
->g
.root
);
163 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_volume_mount: success, blocksize %d\n"), blocksize
));
169 * Free the volume data structure. Called by the core after an unmount or after
170 * an unsuccessful mount to release the memory used by the file system type specific
171 * part of the volume structure.
174 static void fsw_ext4_volume_free(struct fsw_ext4_volume
*vol
)
179 fsw_free(vol
->inotab_bno
);
183 * Get in-depth information on a volume.
186 static fsw_status_t
fsw_ext4_volume_stat(struct fsw_ext4_volume
*vol
, struct fsw_volume_stat
*sb
)
188 sb
->total_bytes
= (fsw_u64
)vol
->sb
->s_blocks_count_lo
* vol
->g
.log_blocksize
;
189 sb
->free_bytes
= (fsw_u64
)vol
->sb
->s_free_blocks_count_lo
* vol
->g
.log_blocksize
;
194 * Get full information on a dnode from disk. This function is called by the core
195 * whenever it needs to access fields in the dnode structure that may not
196 * be filled immediately upon creation of the dnode. In the case of ext4, we
197 * delay fetching of the inode structure until dnode_fill is called. The size and
198 * type fields are invalid until this function has been called.
201 static fsw_status_t
fsw_ext4_dnode_fill(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
)
204 fsw_u32 groupno
, ino_in_group
, ino_bno
, ino_index
;
210 FSW_MSG_DEBUG((FSW_MSGSTR("fsw_ext4_dnode_fill: inode %d\n"), dno
->g
.dnode_id
));
212 // read the inode block
213 groupno
= (dno
->g
.dnode_id
- 1) / vol
->sb
->s_inodes_per_group
;
214 ino_in_group
= (dno
->g
.dnode_id
- 1) % vol
->sb
->s_inodes_per_group
;
215 ino_bno
= vol
->inotab_bno
[groupno
] +
216 ino_in_group
/ (vol
->g
.phys_blocksize
/ vol
->inode_size
);
217 ino_index
= ino_in_group
% (vol
->g
.phys_blocksize
/ vol
->inode_size
);
218 status
= fsw_block_get(vol
, ino_bno
, 2, (void **)&buffer
);
222 // keep our inode around
223 status
= fsw_memdup((void **)&dno
->raw
, buffer
+ ino_index
* vol
->inode_size
, vol
->inode_size
);
224 fsw_block_release(vol
, ino_bno
, buffer
);
228 // get info from the inode
229 dno
->g
.size
= dno
->raw
->i_size_lo
; // TODO: check docs for 64-bit sized files
231 if (S_ISREG(dno
->raw
->i_mode
))
232 dno
->g
.type
= FSW_DNODE_TYPE_FILE
;
233 else if (S_ISDIR(dno
->raw
->i_mode
))
234 dno
->g
.type
= FSW_DNODE_TYPE_DIR
;
235 else if (S_ISLNK(dno
->raw
->i_mode
))
236 dno
->g
.type
= FSW_DNODE_TYPE_SYMLINK
;
238 dno
->g
.type
= FSW_DNODE_TYPE_SPECIAL
;
244 * Free the dnode data structure. Called by the core when deallocating a dnode
245 * structure to release the memory used by the file system type specific part
246 * of the dnode structure.
249 static void fsw_ext4_dnode_free(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
)
256 * Get in-depth information on a dnode. The core makes sure that fsw_ext4_dnode_fill
257 * has been called on the dnode before this function is called. Note that some
258 * data is not directly stored into the structure, but passed to a host-specific
259 * callback that converts it to the host-specific format.
262 static fsw_status_t
fsw_ext4_dnode_stat(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
263 struct fsw_dnode_stat
*sb
)
265 sb
->used_bytes
= dno
->raw
->i_blocks_lo
* EXT4_BLOCK_SIZE(vol
->sb
); // very, very strange...
266 sb
->store_time_posix(sb
, FSW_DNODE_STAT_CTIME
, dno
->raw
->i_ctime
);
267 sb
->store_time_posix(sb
, FSW_DNODE_STAT_ATIME
, dno
->raw
->i_atime
);
268 sb
->store_time_posix(sb
, FSW_DNODE_STAT_MTIME
, dno
->raw
->i_mtime
);
269 sb
->store_attr_posix(sb
, dno
->raw
->i_mode
);
275 * Retrieve file data mapping information. This function is called by the core when
276 * fsw_shandle_read needs to know where on the disk the required piece of the file's
277 * data can be found. The core makes sure that fsw_ext4_dnode_fill has been called
278 * on the dnode before. Our task here is to get the physical disk block number for
279 * the requested logical block number.
282 * The ext2 file system does not use extents, but stores a list of block numbers
283 * using the usual direct, indirect, double-indirect, triple-indirect scheme. To
284 * optimize access, this function checks if the following file blocks are mapped
285 * to consecutive disk blocks and returns a combined extent if possible.
288 static fsw_status_t
fsw_ext4_get_extent(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
289 struct fsw_extent
*extent
)
292 fsw_u32 bno
, release_bno
, buf_bcnt
, file_bcnt
;
296 // Preconditions: The caller has checked that the requested logical block
297 // is within the file's size. The dnode has complete information, i.e.
298 // fsw_ext4_dnode_read_info was called successfully on it.
300 extent
->type
= FSW_EXTENT_TYPE_PHYSBLOCK
;
301 extent
->log_count
= 1;
302 bno
= extent
->log_start
;
304 // try direct block pointers in the inode
305 if (bno
< EXT4_NDIR_BLOCKS
) {
309 bno
-= EXT4_NDIR_BLOCKS
;
311 // try indirect block
312 if (bno
< vol
->ind_bcnt
) {
313 path
[0] = EXT4_IND_BLOCK
;
317 bno
-= vol
->ind_bcnt
;
319 // try double-indirect block
320 if (bno
< vol
->dind_bcnt
) {
321 path
[0] = EXT4_DIND_BLOCK
;
322 path
[1] = bno
/ vol
->ind_bcnt
;
323 path
[2] = bno
% vol
->ind_bcnt
;
326 bno
-= vol
->dind_bcnt
;
328 // use the triple-indirect block
329 path
[0] = EXT4_TIND_BLOCK
;
330 path
[1] = bno
/ vol
->dind_bcnt
;
331 path
[2] = (bno
/ vol
->ind_bcnt
) % vol
->ind_bcnt
;
332 path
[3] = bno
% vol
->ind_bcnt
;
338 // follow the indirection path
339 buffer
= dno
->raw
->i_block
;
340 buf_bcnt
= EXT4_NDIR_BLOCKS
;
343 bno
= buffer
[path
[i
]];
345 extent
->type
= FSW_EXTENT_TYPE_SPARSE
;
347 fsw_block_release(vol
, release_bno
, buffer
);
354 fsw_block_release(vol
, release_bno
, buffer
);
355 status
= fsw_block_get(vol
, bno
, 1, (void **)&buffer
);
359 buf_bcnt
= vol
->ind_bcnt
;
361 extent
->phys_start
= bno
;
363 // check if the following blocks can be aggregated into one extent
364 file_bcnt
= (fsw_u32
)((dno
->g
.size
+ vol
->g
.log_blocksize
- 1) & (vol
->g
.log_blocksize
- 1));
365 while (path
[i
] + extent
->log_count
< buf_bcnt
&& // indirect block has more block pointers
366 extent
->log_start
+ extent
->log_count
< file_bcnt
) { // file has more blocks
367 if (buffer
[path
[i
] + extent
->log_count
] == buffer
[path
[i
] + extent
->log_count
- 1] + 1)
374 fsw_block_release(vol
, release_bno
, buffer
);
379 * Lookup a directory's child dnode by name. This function is called on a directory
380 * to retrieve the directory entry with the given name. A dnode is constructed for
381 * this entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
382 * and the dnode is actually a directory.
385 static fsw_status_t
fsw_ext4_dir_lookup(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
386 struct fsw_string
*lookup_name
, struct fsw_ext4_dnode
**child_dno_out
)
389 struct fsw_shandle shand
;
391 struct ext4_dir_entry entry
;
392 struct fsw_string entry_name
;
394 // Preconditions: The caller has checked that dno is a directory node.
396 entry_name
.type
= FSW_STRING_TYPE_ISO88591
;
398 // setup handle to read the directory
399 status
= fsw_shandle_open(dno
, &shand
);
403 // scan the directory for the file
405 while (child_ino
== 0) {
407 status
= fsw_ext4_read_dentry(&shand
, &entry
);
410 if (entry
.inode
== 0) {
411 // end of directory reached
412 status
= FSW_NOT_FOUND
;
417 entry_name
.len
= entry_name
.size
= entry
.name_len
;
418 entry_name
.data
= entry
.name
;
419 if (fsw_streq(lookup_name
, &entry_name
)) {
420 child_ino
= entry
.inode
;
425 // setup a dnode for the child item
426 status
= fsw_dnode_create(dno
, child_ino
, FSW_DNODE_TYPE_UNKNOWN
, &entry_name
, child_dno_out
);
429 fsw_shandle_close(&shand
);
434 * Get the next directory entry when reading a directory. This function is called during
435 * directory iteration to retrieve the next directory entry. A dnode is constructed for
436 * the entry and returned. The core makes sure that fsw_ext4_dnode_fill has been called
437 * and the dnode is actually a directory. The shandle provided by the caller is used to
438 * record the position in the directory between calls.
441 static fsw_status_t
fsw_ext4_dir_read(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
442 struct fsw_shandle
*shand
, struct fsw_ext4_dnode
**child_dno_out
)
445 struct ext4_dir_entry entry
;
446 struct fsw_string entry_name
;
448 // Preconditions: The caller has checked that dno is a directory node. The caller
449 // has opened a storage handle to the directory's storage and keeps it around between
454 status
= fsw_ext4_read_dentry(shand
, &entry
);
457 if (entry
.inode
== 0) // end of directory
458 return FSW_NOT_FOUND
;
461 if ((entry
.name_len
== 1 && entry
.name
[0] == '.') ||
462 (entry
.name_len
== 2 && entry
.name
[0] == '.' && entry
.name
[1] == '.'))
468 entry_name
.type
= FSW_STRING_TYPE_ISO88591
;
469 entry_name
.len
= entry_name
.size
= entry
.name_len
;
470 entry_name
.data
= entry
.name
;
472 // setup a dnode for the child item
473 status
= fsw_dnode_create(dno
, entry
.inode
, FSW_DNODE_TYPE_UNKNOWN
, &entry_name
, child_dno_out
);
479 * Read a directory entry from the directory's raw data. This internal function is used
480 * to read a raw ext2 directory entry into memory. The shandle's position pointer is adjusted
481 * to point to the next entry.
484 static fsw_status_t
fsw_ext4_read_dentry(struct fsw_shandle
*shand
, struct ext4_dir_entry
*entry
)
490 // read dir_entry header (fixed length)
492 status
= fsw_shandle_read(shand
, &buffer_size
, entry
);
496 if (buffer_size
< 8 || entry
->rec_len
== 0) {
497 // end of directory reached
501 if (entry
->rec_len
< 8)
502 return FSW_VOLUME_CORRUPTED
;
503 if (entry
->inode
!= 0) {
504 // this entry is used
505 if (entry
->rec_len
< 8 + entry
->name_len
)
506 return FSW_VOLUME_CORRUPTED
;
510 // valid, but unused entry, skip it
511 shand
->pos
+= entry
->rec_len
- 8;
514 // read file name (variable length)
515 buffer_size
= entry
->name_len
;
516 status
= fsw_shandle_read(shand
, &buffer_size
, entry
->name
);
519 if (buffer_size
< entry
->name_len
)
520 return FSW_VOLUME_CORRUPTED
;
522 // skip any remaining padding
523 shand
->pos
+= entry
->rec_len
- (8 + entry
->name_len
);
529 * Get the target path of a symbolic link. This function is called when a symbolic
530 * link needs to be resolved. The core makes sure that the fsw_ext4_dnode_fill has been
531 * called on the dnode and that it really is a symlink.
533 * For ext4, the target path can be stored inline in the inode structure (in the space
534 * otherwise occupied by the block pointers) or in the inode's data. There is no flag
535 * indicating this, only the number of blocks entry (i_blocks) can be used as an
536 * indication. The check used here comes from the Linux kernel.
539 static fsw_status_t
fsw_ext4_readlink(struct fsw_ext4_volume
*vol
, struct fsw_ext4_dnode
*dno
,
540 struct fsw_string
*link_target
)
546 if (dno
->g
.size
> FSW_PATH_MAX
)
547 return FSW_VOLUME_CORRUPTED
;
549 /* Linux kernels ext4_inode_is_fast_symlink... */
550 ea_blocks
= dno
->raw
->i_file_acl_lo
? (vol
->g
.log_blocksize
>> 9) : 0;
552 if (dno
->raw
->i_blocks_lo
- ea_blocks
== 0) {
553 // "fast" symlink, path is stored inside the inode
554 s
.type
= FSW_STRING_TYPE_ISO88591
;
555 s
.size
= s
.len
= (int)dno
->g
.size
;
556 s
.data
= dno
->raw
->i_block
;
557 status
= fsw_strdup_coerce(link_target
, vol
->g
.host_string_type
, &s
);
559 // "slow" symlink, path is stored in normal inode data
560 status
= fsw_dnode_readlink_data(dno
, link_target
);