Lab 連結:Lab: file system
Large files (moderate)
Modify
bmap()so that it implements a doubly-indirect block, in addition to direct blocks and a singly-indirect block. You’ll have to have only 11 direct blocks, rather than 12, to make room for your new doubly-indirect block; you’re not allowed to change the size of an on-disk inode. The first 11 elements ofip->addrs[]should be direct blocks; the 12th should be a singly-indirect block (just like the current one); the 13th should be your new doubly-indirect block. You are done with this exercise whenbigfilewrites 65803 blocks and usertests runs successfully:
這題的策略會是實做出一個 doubly-indirect block
- 不可以更改 on-disk inode
dinode的大小,所以這裡要犧牲一個 direct block 來作為 doubly-indirect block 的用途 ip->addrs[0] ~ ip->addr[10]做為原本的 direct block 的用途- 第 12 個的
ip->addr[11]做為原本的 singly-indirect block (原本的ip->addr[12]) 的用途 - 第 13 個的
ip->addr[12]是新增的,這是 doubly-indirect block
- Make sure you understand
bmap(). Write out a diagram of the relationships betweenip->addrs[], the indirect block, the doubly-indirect block and the singly-indirect blocks it points to, and data blocks. Make sure you understand why adding a doubly-indirect block increases the maximum file size by256*256blocks (really -1, since you have to decrease the number of direct blocks by one).
這個 hint 要我們先理解原版的 bmap()
這在 File System 相關程式碼解析 有分析過了
kernel/fs.c: bmap()
// Inode content
//
// The content (data) associated with each inode is stored
// in blocks on the disk. The first NDIRECT block numbers
// are listed in ip->addrs[]. The next NINDIRECT blocks are
// listed in block ip->addrs[NDIRECT].
// Return the disk block address of the nth block in inode ip.
// If there is no such block, bmap allocates one.
// returns 0 if out of disk space.
static uint
bmap(struct inode *ip, uint bn)
{
uint addr, *a;
struct buf *bp;
// direct 的情形
if(bn < NDIRECT){
// 直接拿第 bn 個 address
// 例如 bn == 0 就拿 ip->addrs[0]
// 一直到第 12 個的 bn == 11 就拿取 ip->addrs[11]
// 現在 addr 拿到了 block address
if((addr = ip->addrs[bn]) == 0){
// 如果 addr 不存在 (addr == 0) balloc 一個新的 block 給 ip->addrs[bn]
addr = balloc(ip->dev);
if(addr == 0)
return 0;
// 原本沒有內容的 ip->addrs[bn] 現在有了一個新的 block
ip->addrs[bn] = addr;
}
// 拿到了 addr 之後就回傳,這也是 bmap() 最重要的任務
// 得知一個 inode 中的第 bn 個 block 的 block address
return addr;
}
// 如果不是 direct mapping,例如想知道第 bn == 12 個 block
// 實際上這是 indirect mapping 中的第 (12 - NDIRECT) == 0 個 block
bn -= NDIRECT;
// bn 照理來說不應該超過 256
if(bn < NINDIRECT){
// Load indirect block, allocating if necessary.
// 先取得圖片中 "indirect" 的那一個 address
// 如果為 0 則一樣 balloc() 一個新的 block 給他
if((addr = ip->addrs[NDIRECT]) == 0){
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[NDIRECT] = addr;
}
// 往下尋找 bmap() 需要回傳的 address
// 要從 addr = ip->addrs[NDIRECT] 的這個 block 中尋找
// 使用 bread() 讀取這個 block 的資料
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
// 還是一樣的套路,沒有資料則 balloc 一個新的 block
// 現在的 addr 就會是 bmap() 要回傳的 address 了
if((addr = a[bn]) == 0){
addr = balloc(ip->dev);
if(addr){
a[bn] = addr;
log_write(bp);
}
}
brelse(bp);
return addr;
}
// 如果不是 direct or indirect, panic()
panic("bmap: out of range");
}
- Think about how you’ll index the doubly-indirect block, and the indirect blocks it points to, with the logical block number.
- If you change the definition of
NDIRECT, you’ll probably have to change the declaration ofaddrs[]instruct inodeinfile.h. Make sure thatstruct inodeandstruct dinodehave the same number of elements in theiraddrs[]arrays.
- If you change the definition of
NDIRECT, make sure to create a newfs.img, sincemkfsusesNDIRECTto build the file system.
更改完 NDIRECT 之後要重新 make 一次
- If your file system gets into a bad state, perhaps by crashing, delete
fs.img(do this from Unix, not xv6). make will build a new clean file system image for you.
如果系統 crash 掉,先 delete fs.img 再重新 make 出一個新的
- Don’t forget to
brelse()each block that youbread().
實做 bmap() 要注意的點
- You should allocate indirect blocks and doubly-indirect blocks only as needed, like the original
bmap().
有需要的時候再使用 balloc() 出新的 block,這個套路在原版的 bmap() 就已經看過了
- Make sure
itruncfrees all blocks of a file, including double-indirect blocks.
這在 File System 相關程式碼解析 有分析過原版的,等一下再加入 doubly-indirect 的部份
// Truncate inode (discard contents).
// Caller must hold ip->lock.
void
itrunc(struct inode *ip)
{
int i, j;
struct buf *bp;
uint *a;
// 刪除 12 個 direct
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
// 刪除 indirect 的部份
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
a = (uint*)bp->data;
// 刪除 indirect block 中 0 ~ 511 的 data block
for(j = 0; j < NINDIRECT; j++){
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
}
ip->size = 0;
iupdate(ip);
}
userteststakes longer to run than in previous labs because for this labFSSIZEis larger and big files are larger.
先前的 usertests 就已經蠻慢的了,這個 big file 的 lab 又會更慢一點,畢竟 FSSIZE 又更大
程式實做
data structure
kernel/fs.h
#define BSIZE 1024 // block size
#define NDIRECT 12
#define NINDIRECT (BSIZE / sizeof(uint)) // 512
#define MAXFILE (NDIRECT + NINDIRECT)
現在 NDIRECT 改為 11,把多出來的做為 doubly-indirect 用
kernel/fs.h
#define BSIZE 1024 // block size
#define NDIRECT 11
#define NINDIRECT (BSIZE / sizeof(uint)) // 512
#define MAXFILE (NDIRECT + NINDIRECT + NINDIRECT * NINDIRECT)
連帶 struct inode 與 struct dinode 要做更動
kernel/file.h
// in-memory copy of an inode
struct inode {
uint dev; // Device number
uint inum; // Inode number
int ref; // Reference count
struct sleeplock lock; // protects everything below here
int valid; // inode has been read from disk?
short type; // copy of disk inode
short major;
short minor;
short nlink;
uint size;
uint addrs[NDIRECT+2];
};
kernel/fs.h
// On-disk inode structure
struct dinode {
short type; // File type
short major; // Major device number (T_DEVICE only)
short minor; // Minor device number (T_DEVICE only)
short nlink; // Number of links to inode in file system
uint size; // Size of file (bytes)
uint addrs[NDIRECT+2]; // Data block addresses
};
bmap()
static uint
bmap(struct inode *ip, uint bn)
{
uint addr, *a;
struct buf *bp;
if(bn < NDIRECT){
if((addr = ip->addrs[bn]) == 0){
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[bn] = addr;
}
return addr;
}
bn -= NDIRECT;
if(bn < NINDIRECT){
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[NDIRECT]) == 0){
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[NDIRECT] = addr;
}
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if((addr = a[bn]) == 0){
addr = balloc(ip->dev);
if(addr){
a[bn] = addr;
log_write(bp);
}
}
brelse(bp);
return addr;
}
// 現在 bn 代表的是 doubly-indirect 中的第 bn 個
bn -= NINDIRECT;
// 在這時候先釐清一下
// 在 ip->addrs[] 的層級中,要拿的是 ip->addrs[NDIRECT + 1]
// 在這一個 doubly-indirect block 中總共有 512 個 address
// 這 512 個 address 指向 512 個 indirect block
// 第 0 個 address 指向的第 0 個 indirect block 負責 bn == 0 ~ 511
// 第 1 個 address 指向的第 1 個 indirect block 負責 bn == 512 ~ (512 * 2 - 1)
// 第 n 個 address 指向的第 n 個 indirect block 負責 ((n - 1) * NINDIRECT) <= bn && bn < n * NINDIRECT
// doubly-indirect 中的第 bn 個 block
// 是由 doubly-indirect block 中
// 第 (bn / NINDIRECT) 個 address 指向的第 (bn / NINDIRECT) 個 indirect block 負責
// 在這個 indrect block 中,
// 又是其中的第 (bn % NINDIRECT) 個 address
// 總之
// 1. ip->addrs[NDIRECT + 1] 得知 doubly-indirect block
// 2. doubly-indirect block 中的第 (bn / NINDIRECT) 個 address 可得知 indirect block
// 3. indirect block 中的第 (bn % NINDIRECT) 個 address 是 bmap() 要回傳的 address
if (bn < NINDIRECT * NINDIRECT) {
if ((addr = ip->addrs[NDIRECT + 1]) == 0) {
addr = balloc(ip->dev);
if(addr == 0)
return 0;
ip->addrs[NDIRECT + 1] = addr;
}
// 此時 addr 拿到的是 doubly-indirect block
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
// 現在要拿第 (bn / NINDIRECT) 個 address,
// 可得知 indirect block 的 address
if((addr = a[bn / NINDIRECT]) == 0){
addr = balloc(ip->dev);
if(addr){
a[bn / NINDIRECT] = addr;
log_write(bp);
}
}
brelse(bp);
// 現在這個 addr 是 indirect block 的 address
// 要拿取第 (bn % NINDIRECT) 個 address
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if ((addr = a[bn % NINDIRECT]) == 0) {
addr = balloc(ip->dev);
if(addr){
a[bn % NINDIRECT] = addr;
log_write(bp);
}
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
itrunc()
// Truncate inode (discard contents).
// Caller must hold ip->lock.
void
itrunc(struct inode *ip)
{
int i, j, k;
struct buf *bp;
uint *a;
// for doubly-indirect block
struct buf *bp0, *bp1;
uint *a0, *a1;
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
a = (uint*)bp->data;
for(j = 0; j < NINDIRECT; j++){
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
}
// 處理 doubly-indirect block 的部份
if (ip->addrs[NDIRECT + 1]) {
bp1 = bread(ip->dev, ip->addrs[NDIRECT + 1]);
a1 = (uint*)bp1->data;
// 分別檢查這之中的 512 個 indirect block
for(j = 0; j < NINDIRECT; j++){
if(a1[j]) {
bp0 = bread(ip->dev, a1[j]);
a0 = (uint *) bp0->data;
for (k = 0; k < NINDIRECT; k++) {
if (a0[k])
bfree(ip->dev, a0[k]);
}
brelse(bp0);
bfree(ip->dev, a1[j]);
a1[j] = 0;
}
}
brelse(bp1);
bfree(ip->dev, ip->addrs[NDIRECT + 1]);
ip->addrs[NDIRECT + 1] = 0;
}
ip->size = 0;
iupdate(ip);
}
Symbolic links (moderate)
In this exercise you will add symbolic links to xv6. Symbolic links (or soft links) refer to a linked file by pathname; when a symbolic link is opened, the kernel follows the link to the referred file. Symbolic links resembles hard links, but hard links are restricted to pointing to file on the same disk, while symbolic links can cross disk devices. Although xv6 doesn’t support multiple devices, implementing this system call is a good exercise to understand how pathname lookup works.
hard link 只限縮在同一個 disk,symbolic link (soft link) 則可以跨 disk,這一題要做的是 soft link,雖然 xv6 並沒有支援多個 devices,不過實做這個 system call 可以很好的了解 pathname 的尋找過程
You will implement the
symlink(char *target, char *path)system call, which creates a new symbolic link atpaththat refers to file named bytarget. For further information, see themanpagesymlink. To test, addsymlinktestto theMakefileand run it. Your solution is complete when the tests produce the following output (includingusertestssucceeding).
例如原本有一個檔案 target,在 symlink(char *target, char *path) 之後,path 就會也指向 target 這個檔案
- 現在的策略是把這個
target(或是target的inum) 存放於這個 type 為T_SYMLINK的檔案,在拿到target之後,再用原先的方式把檔案開啟,所以這裡需要先了解有了target(orinum) 之後,原本的檔案開啟流程為何?
- First, create a new system call number for
symlink, add an entry touser/usys.pl,user/user.h, and implement an emptysys_symlinkinkernel/sysfile.c.
user/usys.pl
entry("symlink");
user/user.h
int symlink(char *target, char *path);
kernel/syscall.h
#define SYS_symlink 22
kernel/syscall.c
extern uint64 sys_symlink(void);
static uint64 (*syscalls[])(void) = {
// ...
[SYS_symlink] sys_symlink,
};
kernel/sysfile.c
uint64
sys_symlink(void)
{
// TODO
return 0;
}
- Add a new file type (
T_SYMLINK) tokernel/stat.hto represent a symbolic link.
#define T_DIR 1 // Directory
#define T_FILE 2 // File
#define T_DEVICE 3 // Device
#define T_SYMLINK 4 // Symbolic link
struct stat {
int dev; // File system's disk device
uint ino; // Inode number
short type; // Type of file
short nlink; // Number of links to file
uint64 size; // Size of file in bytes
};
- Add a new flag to
kernel/fcntl.h, (O_NOFOLLOW), that can be used with the open system call. Note that flags passed to open are combined using a bitwise OR operator, so your new flag should not overlap with any existing flags. This will let you compileuser/symlinktest.conce you add it to theMakefile.
例如這個使用情形
user/symlinktest.c
// stat a symbolic link using O_NOFOLLOW
static int
stat_slink(char *pn, struct stat *st)
{
int fd = open(pn, O_RDONLY | O_NOFOLLOW);
if(fd < 0)
return -1;
if(fstat(fd, st) != 0)
return -1;
return 0;
}
使用 flag O_NOFOLLOW,代表的意義為,如果打開了一個 symlink,不要跟著 follow 下去打開這個 link
- Implement the
symlink(target, path)system call to create a new symbolic link atpaththat refers totarget. Note thattargetdoes not need to exist for the system call to succeed. You will need to choose somewhere to store thetargetpath of a symbolic link, for example, in the inode’s data blocks.symlinkshouldreturnan integer representing success (0) or failure (-1) similar tolinkandunlink.
- 需要找個地方存放 the
targetpath of a symbolic link,會像是 inode 中的 data blocks return 0代表 success;return -1代表 failure,這跟linkandunlink一樣- 就算
target不存在,這個 system call 也算是成功- 我想失敗應該會失敗在接下去的 open or 就真的是失敗?
- Modify the open system call to handle the case where the
pathrefers to a symbolic link. If the file does not exist,openmust fail. When a process specifiesO_NOFOLLOWin the flags to open, open should open thesymlink(and not follow the symbolic link).
O_NOFOLLOW的用意在於還是要有一個方法可以打開這個symlink本身
- If the linked file is also a symbolic link, you must recursively follow it until a non-link file is reached. If the links form a cycle, you must return an error code. You may approximate this by returning an error code if the depth of links reaches some threshold (e.g., 10).
symbolic link 也可以指向一個 symbolic link,需要 recursive 的方式解析
- 可以設定一個 threshold,畢竟也有可能發生 circular 的事情發生
- Other system calls (e.g.,
linkandunlink) must not follow symbolic links; these system calls operate on the symbolic link itself.
why?
- You do not have to handle symbolic links to directories for this lab.
- 這個 lab 不需要處理 directories 的 symbolic link
程式實做
kernel/stat.h
#define T_DIR 1 // Directory
#define T_FILE 2 // File
#define T_DEVICE 3 // Device
#define T_SYMLINK 4 // Symbolic link
struct stat {
int dev; // File system's disk device
uint ino; // Inode number
short type; // Type of file
short nlink; // Number of links to file
uint64 size; // Size of file in bytes
};
kernle/fcntl.h: 新增O_NOFOLLOW
#define O_RDONLY 0x000
#define O_WRONLY 0x001
#define O_RDWR 0x002
#define O_CREATE 0x200
#define O_TRUNC 0x400
#define O_NOFOLLOW 0x800
kernel/sysfile.c: sys_symlink()這裡的目的在於把target存放於此T_SYMLINK的 file 中
uint64
sys_symlink(void)
{
char path[MAXPATH], target[MAXPATH];
struct inode *ip;
int n;
if(argstr(0, target, MAXPATH) < 0 || argstr(1, path, MAXPATH) < 0)
return -1;
begin_op();
if ((ip = create(path, T_SYMLINK, 0, 0)) == 0) {
end_op();
return -1;
}
n = writei(ip, 0, (uint64) target, 0, strlen(target));
if (n < strlen(target)) {
ip->nlink = 0;
iupdate(ip);
iunlockput(ip);
end_op();
return -1;
}
iunlockput(ip);
end_op();
return 0;
}
kernel/sysfile.c: sys_open()
uint64
sys_open(void)
{
char path[MAXPATH];
char current_path[MAXPATH];
char next_path[MAXPATH];
int fd, omode;
struct file *f;
struct inode *ip;
int n;
int depth;
argint(1, &omode);
if((n = argstr(0, path, MAXPATH)) < 0)
return -1;
begin_op();
if(omode & O_CREATE){
ip = create(path, T_FILE, 0, 0);
if(ip == 0){
end_op();
return -1;
}
} else {
strncpy(current_path, path, MAXPATH);
depth = 0;
while (1) {
if((ip = namei(current_path)) == 0){
end_op();
return -1;
}
ilock(ip);
if (ip->type != T_SYMLINK)
break;
if (omode & O_NOFOLLOW)
break;
if (++depth > 10) {
iunlockput(ip);
end_op();
return -1;
}
// read the `target`
memset(next_path, 0, MAXPATH);
n = readi(ip, 0, (uint64) next_path, 0, sizeof(next_path) - 1);
iunlockput(ip);
end_op();
if (n <= 0)
return -1;
strncpy(current_path, next_path, MAXPATH);
begin_op();
}
if(ip->type == T_DIR && omode != O_RDONLY){
iunlockput(ip);
end_op();
return -1;
}
}
if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){
iunlockput(ip);
end_op();
return -1;
}
if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){
if(f)
fileclose(f);
iunlockput(ip);
end_op();
return -1;
}
if(ip->type == T_DEVICE){
f->type = FD_DEVICE;
f->major = ip->major;
} else {
f->type = FD_INODE;
f->off = 0;
}
f->ip = ip;
f->readable = !(omode & O_WRONLY);
f->writable = (omode & O_WRONLY) || (omode & O_RDWR);
if((omode & O_TRUNC) && ip->type == T_FILE){
itrunc(ip);
}
iunlock(ip);
end_op();
return fd;
}

心得
這個 lab 對我來說的課題是抓重點程式碼,其實不必了解到所有到底層的流程,更多的時候了解各個主要功能 function 之間的互動會更重要一些