ops.go 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847
  1. // Copyright 2015 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package fuseops
  15. import (
  16. "os"
  17. "time"
  18. )
  19. ////////////////////////////////////////////////////////////////////////
  20. // File system
  21. ////////////////////////////////////////////////////////////////////////
  22. // Return statistics about the file system's capacity and available resources.
  23. //
  24. // Called by statfs(2) and friends:
  25. //
  26. // * (https://goo.gl/Xi1lDr) sys_statfs called user_statfs, which calls
  27. // vfs_statfs, which calls statfs_by_dentry.
  28. //
  29. // * (https://goo.gl/VAIOwU) statfs_by_dentry calls the superblock
  30. // operation statfs, which in our case points at
  31. // fuse_statfs (cf. https://goo.gl/L7BTM3)
  32. //
  33. // * (https://goo.gl/Zn7Sgl) fuse_statfs sends a statfs op, then uses
  34. // convert_fuse_statfs to convert the response in a straightforward
  35. // manner.
  36. //
  37. // This op is particularly important on OS X: if you don't implement it, the
  38. // file system will not successfully mount. If you don't model a sane amount of
  39. // free space, the Finder will refuse to copy files into the file system.
  40. type StatFSOp struct {
  41. // The size of the file system's blocks. This may be used, in combination
  42. // with the block counts below, by callers of statfs(2) to infer the file
  43. // system's capacity and space availability.
  44. //
  45. // On Linux this is surfaced as statfs::f_frsize, matching the posix standard
  46. // (http://goo.gl/LktgrF), which says that f_blocks and friends are in units
  47. // of f_frsize. On OS X this is surfaced as statfs::f_bsize, which plays the
  48. // same roll.
  49. //
  50. // It appears as though the original intent of statvfs::f_frsize in the posix
  51. // standard was to support a smaller addressable unit than statvfs::f_bsize
  52. // (cf. The Linux Programming Interface by Michael Kerrisk,
  53. // https://goo.gl/5LZMxQ). Therefore users should probably arrange for this
  54. // to be no larger than IoSize.
  55. //
  56. // On Linux this can be any value, and will be faithfully returned to the
  57. // caller of statfs(2) (see the code walk above). On OS X it appears that
  58. // only powers of 2 in the range [2^7, 2^20] are preserved, and a value of
  59. // zero is treated as 4096.
  60. //
  61. // This interface does not distinguish between blocks and block fragments.
  62. BlockSize uint32
  63. // The total number of blocks in the file system, the number of unused
  64. // blocks, and the count of the latter that are available for use by non-root
  65. // users.
  66. //
  67. // For each category, the corresponding number of bytes is derived by
  68. // multiplying by BlockSize.
  69. Blocks uint64
  70. BlocksFree uint64
  71. BlocksAvailable uint64
  72. // The preferred size of writes to and reads from the file system, in bytes.
  73. // This may affect clients that use statfs(2) to size buffers correctly. It
  74. // does not appear to influence the size of writes sent from the kernel to
  75. // the file system daemon.
  76. //
  77. // On Linux this is surfaced as statfs::f_bsize, and on OS X as
  78. // statfs::f_iosize. Both are documented in `man 2 statfs` as "optimal
  79. // transfer block size".
  80. //
  81. // On Linux this can be any value. On OS X it appears that only powers of 2
  82. // in the range [2^12, 2^25] are faithfully preserved, and a value of zero is
  83. // treated as 65536.
  84. IoSize uint32
  85. // The total number of inodes in the file system, and how many remain free.
  86. Inodes uint64
  87. InodesFree uint64
  88. }
  89. ////////////////////////////////////////////////////////////////////////
  90. // Inodes
  91. ////////////////////////////////////////////////////////////////////////
  92. // Look up a child by name within a parent directory. The kernel sends this
  93. // when resolving user paths to dentry structs, which are then cached.
  94. type LookUpInodeOp struct {
  95. // The ID of the directory inode to which the child belongs.
  96. Parent InodeID
  97. // The name of the child of interest, relative to the parent. For example, in
  98. // this directory structure:
  99. //
  100. // foo/
  101. // bar/
  102. // baz
  103. //
  104. // the file system may receive a request to look up the child named "bar" for
  105. // the parent foo/.
  106. Name string
  107. // The resulting entry. Must be filled out by the file system.
  108. //
  109. // The lookup count for the inode is implicitly incremented. See notes on
  110. // ForgetInodeOp for more information.
  111. Entry ChildInodeEntry
  112. }
  113. // Refresh the attributes for an inode whose ID was previously returned in a
  114. // LookUpInodeOp. The kernel sends this when the FUSE VFS layer's cache of
  115. // inode attributes is stale. This is controlled by the AttributesExpiration
  116. // field of ChildInodeEntry, etc.
  117. type GetInodeAttributesOp struct {
  118. // The inode of interest.
  119. Inode InodeID
  120. // Set by the file system: attributes for the inode, and the time at which
  121. // they should expire. See notes on ChildInodeEntry.AttributesExpiration for
  122. // more.
  123. Attributes InodeAttributes
  124. AttributesExpiration time.Time
  125. }
  126. // Change attributes for an inode.
  127. //
  128. // The kernel sends this for obvious cases like chmod(2), and for less obvious
  129. // cases like ftrunctate(2).
  130. type SetInodeAttributesOp struct {
  131. // The inode of interest.
  132. Inode InodeID
  133. // The attributes to modify, or nil for attributes that don't need a change.
  134. Size *uint64
  135. Mode *os.FileMode
  136. Atime *time.Time
  137. Mtime *time.Time
  138. // Set by the file system: the new attributes for the inode, and the time at
  139. // which they should expire. See notes on
  140. // ChildInodeEntry.AttributesExpiration for more.
  141. Attributes InodeAttributes
  142. AttributesExpiration time.Time
  143. }
  144. // Decrement the reference count for an inode ID previously issued by the file
  145. // system.
  146. //
  147. // The comments for the ops that implicitly increment the reference count
  148. // contain a note of this (but see also the note about the root inode below).
  149. // For example, LookUpInodeOp and MkDirOp. The authoritative source is the
  150. // libfuse documentation, which states that any op that returns
  151. // fuse_reply_entry fuse_reply_create implicitly increments (cf.
  152. // http://goo.gl/o5C7Dx).
  153. //
  154. // If the reference count hits zero, the file system can forget about that ID
  155. // entirely, and even re-use it in future responses. The kernel guarantees that
  156. // it will not otherwise use it again.
  157. //
  158. // The reference count corresponds to fuse_inode::nlookup
  159. // (http://goo.gl/ut48S4). Some examples of where the kernel manipulates it:
  160. //
  161. // * (http://goo.gl/vPD9Oh) Any caller to fuse_iget increases the count.
  162. // * (http://goo.gl/B6tTTC) fuse_lookup_name calls fuse_iget.
  163. // * (http://goo.gl/IlcxWv) fuse_create_open calls fuse_iget.
  164. // * (http://goo.gl/VQMQul) fuse_dentry_revalidate increments after
  165. // revalidating.
  166. //
  167. // In contrast to all other inodes, RootInodeID begins with an implicit
  168. // lookup count of one, without a corresponding op to increase it. (There
  169. // could be no such op, because the root cannot be referred to by name.) Code
  170. // walk:
  171. //
  172. // * (http://goo.gl/gWAheU) fuse_fill_super calls fuse_get_root_inode.
  173. //
  174. // * (http://goo.gl/AoLsbb) fuse_get_root_inode calls fuse_iget without
  175. // sending any particular request.
  176. //
  177. // * (http://goo.gl/vPD9Oh) fuse_iget increments nlookup.
  178. //
  179. // File systems should tolerate but not rely on receiving forget ops for
  180. // remaining inodes when the file system unmounts, including the root inode.
  181. // Rather they should take fuse.Connection.ReadOp returning io.EOF as
  182. // implicitly decrementing all lookup counts to zero.
  183. type ForgetInodeOp struct {
  184. // The inode whose reference count should be decremented.
  185. Inode InodeID
  186. // The amount to decrement the reference count.
  187. N uint64
  188. }
  189. ////////////////////////////////////////////////////////////////////////
  190. // Inode creation
  191. ////////////////////////////////////////////////////////////////////////
  192. // Create a directory inode as a child of an existing directory inode. The
  193. // kernel sends this in response to a mkdir(2) call.
  194. //
  195. // The Linux kernel appears to verify the name doesn't already exist (mkdir
  196. // calls mkdirat calls user_path_create calls filename_create, which verifies:
  197. // http://goo.gl/FZpLu5). Indeed, the tests in samples/memfs that call in
  198. // parallel appear to bear this out. But osxfuse does not appear to guarantee
  199. // this (cf. https://goo.gl/PqzZDv). And if names may be created outside of the
  200. // kernel's control, it doesn't matter what the kernel does anyway.
  201. //
  202. // Therefore the file system should return EEXIST if the name already exists.
  203. type MkDirOp struct {
  204. // The ID of parent directory inode within which to create the child.
  205. Parent InodeID
  206. // The name of the child to create, and the mode with which to create it.
  207. Name string
  208. Mode os.FileMode
  209. // Set by the file system: information about the inode that was created.
  210. //
  211. // The lookup count for the inode is implicitly incremented. See notes on
  212. // ForgetInodeOp for more information.
  213. Entry ChildInodeEntry
  214. }
  215. // Create a file inode as a child of an existing directory inode. The kernel
  216. // sends this in response to a mknod(2) call. It may also send it in special
  217. // cases such as an NFS export (cf. https://goo.gl/HiLfnK). It is more typical
  218. // to see CreateFileOp, which is received for an open(2) that creates a file.
  219. //
  220. // The Linux kernel appears to verify the name doesn't already exist (mknod
  221. // calls sys_mknodat calls user_path_create calls filename_create, which
  222. // verifies: http://goo.gl/FZpLu5). But osxfuse may not guarantee this, as with
  223. // mkdir(2). And if names may be created outside of the kernel's control, it
  224. // doesn't matter what the kernel does anyway.
  225. //
  226. // Therefore the file system should return EEXIST if the name already exists.
  227. type MkNodeOp struct {
  228. // The ID of parent directory inode within which to create the child.
  229. Parent InodeID
  230. // The name of the child to create, and the mode with which to create it.
  231. Name string
  232. Mode os.FileMode
  233. // Set by the file system: information about the inode that was created.
  234. //
  235. // The lookup count for the inode is implicitly incremented. See notes on
  236. // ForgetInodeOp for more information.
  237. Entry ChildInodeEntry
  238. }
  239. // Create a file inode and open it.
  240. //
  241. // The kernel sends this when the user asks to open a file with the O_CREAT
  242. // flag and the kernel has observed that the file doesn't exist. (See for
  243. // example lookup_open, http://goo.gl/PlqE9d). However, osxfuse doesn't appear
  244. // to make this check atomically (cf. https://goo.gl/PqzZDv). And if names may
  245. // be created outside of the kernel's control, it doesn't matter what the
  246. // kernel does anyway.
  247. //
  248. // Therefore the file system should return EEXIST if the name already exists.
  249. type CreateFileOp struct {
  250. // The ID of parent directory inode within which to create the child file.
  251. Parent InodeID
  252. // The name of the child to create, and the mode with which to create it.
  253. Name string
  254. Mode os.FileMode
  255. // Set by the file system: information about the inode that was created.
  256. //
  257. // The lookup count for the inode is implicitly incremented. See notes on
  258. // ForgetInodeOp for more information.
  259. Entry ChildInodeEntry
  260. // Set by the file system: an opaque ID that will be echoed in follow-up
  261. // calls for this file using the same struct file in the kernel. In practice
  262. // this usually means follow-up calls using the file descriptor returned by
  263. // open(2).
  264. //
  265. // The handle may be supplied in future ops like ReadFileOp that contain a
  266. // file handle. The file system must ensure this ID remains valid until a
  267. // later call to ReleaseFileHandle.
  268. Handle HandleID
  269. }
  270. // Create a symlink inode. If the name already exists, the file system should
  271. // return EEXIST (cf. the notes on CreateFileOp and MkDirOp).
  272. type CreateSymlinkOp struct {
  273. // The ID of parent directory inode within which to create the child symlink.
  274. Parent InodeID
  275. // The name of the symlink to create.
  276. Name string
  277. // The target of the symlink.
  278. Target string
  279. // Set by the file system: information about the symlink inode that was
  280. // created.
  281. //
  282. // The lookup count for the inode is implicitly incremented. See notes on
  283. // ForgetInodeOp for more information.
  284. Entry ChildInodeEntry
  285. }
  286. ////////////////////////////////////////////////////////////////////////
  287. // Unlinking
  288. ////////////////////////////////////////////////////////////////////////
  289. // Rename a file or directory, given the IDs of the original parent directory
  290. // and the new one (which may be the same).
  291. //
  292. // In Linux, this is called by vfs_rename (https://goo.gl/eERItT), which is
  293. // called by sys_renameat2 (https://goo.gl/fCC9qC).
  294. //
  295. // The kernel takes care of ensuring that the source and destination are not
  296. // identical (in which case it does nothing), that the rename is not across
  297. // file system boundaries, and that the destination doesn't already exist with
  298. // the wrong type. Some subtleties that the file system must care about:
  299. //
  300. // * If the new name is an existing directory, the file system must ensure it
  301. // is empty before replacing it, returning ENOTEMPTY otherwise. (This is
  302. // per the posix spec: http://goo.gl/4XtT79)
  303. //
  304. // * The rename must be atomic from the point of view of an observer of the
  305. // new name. That is, if the new name already exists, there must be no
  306. // point at which it doesn't exist.
  307. //
  308. // * It is okay for the new name to be modified before the old name is
  309. // removed; these need not be atomic. In fact, the Linux man page
  310. // explicitly says this is likely (cf. https://goo.gl/Y1wVZc).
  311. //
  312. // * Linux bends over backwards (https://goo.gl/pLDn3r) to ensure that
  313. // neither the old nor the new parent can be concurrently modified. But
  314. // it's not clear whether OS X does this, and in any case it doesn't matter
  315. // for file systems that may be modified remotely. Therefore a careful file
  316. // system implementor should probably ensure if possible that the unlink
  317. // step in the "link new name, unlink old name" process doesn't unlink a
  318. // different inode than the one that was linked to the new name. Still,
  319. // posix and the man pages are imprecise about the actual semantics of a
  320. // rename if it's not atomic, so it is probably not disastrous to be loose
  321. // about this.
  322. //
  323. type RenameOp struct {
  324. // The old parent directory, and the name of the entry within it to be
  325. // relocated.
  326. OldParent InodeID
  327. OldName string
  328. // The new parent directory, and the name of the entry to be created or
  329. // overwritten within it.
  330. NewParent InodeID
  331. NewName string
  332. }
  333. // Unlink a directory from its parent. Because directories cannot have a link
  334. // count above one, this means the directory inode should be deleted as well
  335. // once the kernel sends ForgetInodeOp.
  336. //
  337. // The file system is responsible for checking that the directory is empty.
  338. //
  339. // Sample implementation in ext2: ext2_rmdir (http://goo.gl/B9QmFf)
  340. type RmDirOp struct {
  341. // The ID of parent directory inode, and the name of the directory being
  342. // removed within it.
  343. Parent InodeID
  344. Name string
  345. }
  346. // Unlink a file or symlink from its parent. If this brings the inode's link
  347. // count to zero, the inode should be deleted once the kernel sends
  348. // ForgetInodeOp. It may still be referenced before then if a user still has
  349. // the file open.
  350. //
  351. // Sample implementation in ext2: ext2_unlink (http://goo.gl/hY6r6C)
  352. type UnlinkOp struct {
  353. // The ID of parent directory inode, and the name of the entry being removed
  354. // within it.
  355. Parent InodeID
  356. Name string
  357. }
  358. ////////////////////////////////////////////////////////////////////////
  359. // Directory handles
  360. ////////////////////////////////////////////////////////////////////////
  361. // Open a directory inode.
  362. //
  363. // On Linux the sends this when setting up a struct file for a particular inode
  364. // with type directory, usually in response to an open(2) call from a
  365. // user-space process. On OS X it may not be sent for every open(2) (cf.
  366. // https://github.com/osxfuse/osxfuse/issues/199).
  367. type OpenDirOp struct {
  368. // The ID of the inode to be opened.
  369. Inode InodeID
  370. // Set by the file system: an opaque ID that will be echoed in follow-up
  371. // calls for this directory using the same struct file in the kernel. In
  372. // practice this usually means follow-up calls using the file descriptor
  373. // returned by open(2).
  374. //
  375. // The handle may be supplied in future ops like ReadDirOp that contain a
  376. // directory handle. The file system must ensure this ID remains valid until
  377. // a later call to ReleaseDirHandle.
  378. Handle HandleID
  379. }
  380. // Read entries from a directory previously opened with OpenDir.
  381. type ReadDirOp struct {
  382. // The directory inode that we are reading, and the handle previously
  383. // returned by OpenDir when opening that inode.
  384. Inode InodeID
  385. Handle HandleID
  386. // The offset within the directory at which to read.
  387. //
  388. // Warning: this field is not necessarily a count of bytes. Its legal values
  389. // are defined by the results returned in ReadDirResponse. See the notes
  390. // below and the notes on that struct.
  391. //
  392. // In the Linux kernel this ultimately comes from file::f_pos, which starts
  393. // at zero and is set by llseek and by the final consumed result returned by
  394. // each call to ReadDir:
  395. //
  396. // * (http://goo.gl/2nWJPL) iterate_dir, which is called by getdents(2) and
  397. // readdir(2), sets dir_context::pos to file::f_pos before calling
  398. // f_op->iterate, and then does the opposite assignment afterward.
  399. //
  400. // * (http://goo.gl/rTQVSL) fuse_readdir, which implements iterate for fuse
  401. // directories, passes dir_context::pos as the offset to fuse_read_fill,
  402. // which passes it on to user-space. fuse_readdir later calls
  403. // parse_dirfile with the same context.
  404. //
  405. // * (http://goo.gl/vU5ukv) For each returned result (except perhaps the
  406. // last, which may be truncated by the page boundary), parse_dirfile
  407. // updates dir_context::pos with fuse_dirent::off.
  408. //
  409. // It is affected by the Posix directory stream interfaces in the following
  410. // manner:
  411. //
  412. // * (http://goo.gl/fQhbyn, http://goo.gl/ns1kDF) opendir initially causes
  413. // filepos to be set to zero.
  414. //
  415. // * (http://goo.gl/ezNKyR, http://goo.gl/xOmDv0) readdir allows the user
  416. // to iterate through the directory one entry at a time. As each entry is
  417. // consumed, its d_off field is stored in __dirstream::filepos.
  418. //
  419. // * (http://goo.gl/WEOXG8, http://goo.gl/rjSXl3) telldir allows the user
  420. // to obtain the d_off field from the most recently returned entry.
  421. //
  422. // * (http://goo.gl/WG3nDZ, http://goo.gl/Lp0U6W) seekdir allows the user
  423. // to seek backward to an offset previously returned by telldir. It
  424. // stores the new offset in filepos, and calls llseek to update the
  425. // kernel's struct file.
  426. //
  427. // * (http://goo.gl/gONQhz, http://goo.gl/VlrQkc) rewinddir allows the user
  428. // to go back to the beginning of the directory, obtaining a fresh view.
  429. // It updates filepos and calls llseek to update the kernel's struct
  430. // file.
  431. //
  432. // Unfortunately, FUSE offers no way to intercept seeks
  433. // (http://goo.gl/H6gEXa), so there is no way to cause seekdir or rewinddir
  434. // to fail. Additionally, there is no way to distinguish an explicit
  435. // rewinddir followed by readdir from the initial readdir, or a rewinddir
  436. // from a seekdir to the value returned by telldir just after opendir.
  437. //
  438. // Luckily, Posix is vague about what the user will see if they seek
  439. // backwards, and requires the user not to seek to an old offset after a
  440. // rewind. The only requirement on freshness is that rewinddir results in
  441. // something that looks like a newly-opened directory. So FUSE file systems
  442. // may e.g. cache an entire fresh listing for each ReadDir with a zero
  443. // offset, and return array offsets into that cached listing.
  444. Offset DirOffset
  445. // The destination buffer, whose length gives the size of the read.
  446. //
  447. // The output data should consist of a sequence of FUSE directory entries in
  448. // the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is
  449. // consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent
  450. // to generate this data.
  451. //
  452. // Each entry returned exposes a directory offset to the user that may later
  453. // show up in ReadDirRequest.Offset. See notes on that field for more
  454. // information.
  455. Dst []byte
  456. // Set by the file system: the number of bytes read into Dst.
  457. //
  458. // It is okay for this to be less than len(Dst) if there are not enough
  459. // entries available or the final entry would not fit.
  460. //
  461. // Zero means that the end of the directory has been reached. This is
  462. // unambiguous because NAME_MAX (https://goo.gl/ZxzKaE) plus the size of
  463. // fuse_dirent (https://goo.gl/WO8s3F) plus the 8-byte alignment of
  464. // FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH) is less than the read size of
  465. // PAGE_SIZE used by fuse_readdir (cf. https://goo.gl/VajtS2).
  466. BytesRead int
  467. }
  468. // Release a previously-minted directory handle. The kernel sends this when
  469. // there are no more references to an open directory: all file descriptors are
  470. // closed and all memory mappings are unmapped.
  471. //
  472. // The kernel guarantees that the handle ID will not be used in further ops
  473. // sent to the file system (unless it is reissued by the file system).
  474. //
  475. // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do).
  476. type ReleaseDirHandleOp struct {
  477. // The handle ID to be released. The kernel guarantees that this ID will not
  478. // be used in further calls to the file system (unless it is reissued by the
  479. // file system).
  480. Handle HandleID
  481. }
  482. ////////////////////////////////////////////////////////////////////////
  483. // File handles
  484. ////////////////////////////////////////////////////////////////////////
  485. // Open a file inode.
  486. //
  487. // On Linux the sends this when setting up a struct file for a particular inode
  488. // with type file, usually in response to an open(2) call from a user-space
  489. // process. On OS X it may not be sent for every open(2)
  490. // (cf.https://github.com/osxfuse/osxfuse/issues/199).
  491. type OpenFileOp struct {
  492. // The ID of the inode to be opened.
  493. Inode InodeID
  494. // An opaque ID that will be echoed in follow-up calls for this file using
  495. // the same struct file in the kernel. In practice this usually means
  496. // follow-up calls using the file descriptor returned by open(2).
  497. //
  498. // The handle may be supplied in future ops like ReadFileOp that contain a
  499. // file handle. The file system must ensure this ID remains valid until a
  500. // later call to ReleaseFileHandle.
  501. Handle HandleID
  502. // By default, fuse invalidates the kernel's page cache for an inode when a
  503. // new file handle is opened for that inode (cf. https://goo.gl/2rZ9uk). The
  504. // intent appears to be to allow users to "see" content that has changed
  505. // remotely on a networked file system by re-opening the file.
  506. //
  507. // For file systems where this is not a concern because all modifications for
  508. // a particular inode go through the kernel, set this field to true to
  509. // disable this behavior.
  510. //
  511. // (More discussion: http://goo.gl/cafzWF)
  512. //
  513. // Note that on OS X it appears that the behavior is always as if this field
  514. // is set to true, regardless of its value, at least for files opened in the
  515. // same mode. (Cf. https://github.com/osxfuse/osxfuse/issues/223)
  516. KeepPageCache bool
  517. // Whether to use direct IO for this file handle. By default, the kernel
  518. // suppresses what it sees as redundant operations (including reads beyond
  519. // the precomputed EOF).
  520. //
  521. // Enabling direct IO ensures that all client operations reach the fuse
  522. // layer. This allows for filesystems whose file sizes are not known in
  523. // advance, for example, because contents are generated on the fly.
  524. UseDirectIO bool
  525. }
  526. // Read data from a file previously opened with CreateFile or OpenFile.
  527. //
  528. // Note that this op is not sent for every call to read(2) by the end user;
  529. // some reads may be served by the page cache. See notes on WriteFileOp for
  530. // more.
  531. type ReadFileOp struct {
  532. // The file inode that we are reading, and the handle previously returned by
  533. // CreateFile or OpenFile when opening that inode.
  534. Inode InodeID
  535. Handle HandleID
  536. // The offset within the file at which to read.
  537. Offset int64
  538. // The destination buffer, whose length gives the size of the read.
  539. Dst []byte
  540. // Set by the file system: the number of bytes read.
  541. //
  542. // The FUSE documentation requires that exactly the requested number of bytes
  543. // be returned, except in the case of EOF or error (http://goo.gl/ZgfBkF).
  544. // This appears to be because it uses file mmapping machinery
  545. // (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand
  546. // where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned
  547. // by a previous call to LookUpInode, GetInodeAttributes, etc.
  548. //
  549. // If direct IO is enabled, semantics should match those of read(2).
  550. BytesRead int
  551. }
  552. // Write data to a file previously opened with CreateFile or OpenFile.
  553. //
  554. // When the user writes data using write(2), the write goes into the page
  555. // cache and the page is marked dirty. Later the kernel may write back the
  556. // page via the FUSE VFS layer, causing this op to be sent:
  557. //
  558. // * The kernel calls address_space_operations::writepage when a dirty page
  559. // needs to be written to backing store (cf. http://goo.gl/Ezbewg). Fuse
  560. // sets this to fuse_writepage (cf. http://goo.gl/IeNvLT).
  561. //
  562. // * (http://goo.gl/Eestuy) fuse_writepage calls fuse_writepage_locked.
  563. //
  564. // * (http://goo.gl/RqYIxY) fuse_writepage_locked makes a write request to
  565. // the userspace server.
  566. //
  567. // Note that the kernel *will* ensure that writes are received and acknowledged
  568. // by the file system before sending a FlushFileOp when closing the file
  569. // descriptor to which they were written. Cf. the notes on
  570. // fuse.MountConfig.DisableWritebackCaching.
  571. //
  572. // (See also http://goo.gl/ocdTdM, fuse-devel thread "Fuse guarantees on
  573. // concurrent requests".)
  574. type WriteFileOp struct {
  575. // The file inode that we are modifying, and the handle previously returned
  576. // by CreateFile or OpenFile when opening that inode.
  577. Inode InodeID
  578. Handle HandleID
  579. // The offset at which to write the data below.
  580. //
  581. // The man page for pwrite(2) implies that aside from changing the file
  582. // handle's offset, using pwrite is equivalent to using lseek(2) and then
  583. // write(2). The man page for lseek(2) says the following:
  584. //
  585. // "The lseek() function allows the file offset to be set beyond the end of
  586. // the file (but this does not change the size of the file). If data is later
  587. // written at this point, subsequent reads of the data in the gap (a "hole")
  588. // return null bytes (aq\0aq) until data is actually written into the gap."
  589. //
  590. // It is therefore reasonable to assume that the kernel is looking for
  591. // the following semantics:
  592. //
  593. // * If the offset is less than or equal to the current size, extend the
  594. // file as necessary to fit any data that goes past the end of the file.
  595. //
  596. // * If the offset is greater than the current size, extend the file
  597. // with null bytes until it is not, then do the above.
  598. //
  599. Offset int64
  600. // The data to write.
  601. //
  602. // The FUSE documentation requires that exactly the number of bytes supplied
  603. // be written, except on error (http://goo.gl/KUpwwn). This appears to be
  604. // because it uses file mmapping machinery (http://goo.gl/SGxnaN) to write a
  605. // page at a time.
  606. Data []byte
  607. }
  608. // Synchronize the current contents of an open file to storage.
  609. //
  610. // vfs.txt documents this as being called for by the fsync(2) system call
  611. // (cf. http://goo.gl/j9X8nB). Code walk for that case:
  612. //
  613. // * (http://goo.gl/IQkWZa) sys_fsync calls do_fsync, calls vfs_fsync, calls
  614. // vfs_fsync_range.
  615. //
  616. // * (http://goo.gl/5L2SMy) vfs_fsync_range calls f_op->fsync.
  617. //
  618. // Note that this is also sent by fdatasync(2) (cf. http://goo.gl/01R7rF), and
  619. // may be sent for msync(2) with the MS_SYNC flag (see the notes on
  620. // FlushFileOp).
  621. //
  622. // See also: FlushFileOp, which may perform a similar function when closing a
  623. // file (but which is not used in "real" file systems).
  624. type SyncFileOp struct {
  625. // The file and handle being sync'd.
  626. Inode InodeID
  627. Handle HandleID
  628. }
  629. // Flush the current state of an open file to storage upon closing a file
  630. // descriptor.
  631. //
  632. // vfs.txt documents this as being sent for each close(2) system call (cf.
  633. // http://goo.gl/FSkbrq). Code walk for that case:
  634. //
  635. // * (http://goo.gl/e3lv0e) sys_close calls __close_fd, calls filp_close.
  636. // * (http://goo.gl/nI8fxD) filp_close calls f_op->flush (fuse_flush).
  637. //
  638. // But note that this is also sent in other contexts where a file descriptor is
  639. // closed, such as dup2(2) (cf. http://goo.gl/NQDvFS). In the case of close(2),
  640. // a flush error is returned to the user. For dup2(2), it is not.
  641. //
  642. // One potentially significant case where this may not be sent is mmap'd files,
  643. // where the behavior is complicated:
  644. //
  645. // * munmap(2) does not cause flushes (cf. http://goo.gl/j8B9g0).
  646. //
  647. // * On OS X, if a user modifies a mapped file via the mapping before
  648. // closing the file with close(2), the WriteFileOps for the modifications
  649. // may not be received before the FlushFileOp for the close(2) (cf.
  650. // https://github.com/osxfuse/osxfuse/issues/202). It appears that this may
  651. // be fixed in osxfuse 3 (cf. https://goo.gl/rtvbko).
  652. //
  653. // * However, you safely can arrange for writes via a mapping to be
  654. // flushed by calling msync(2) followed by close(2). On OS X msync(2)
  655. // will cause a WriteFileOps to go through and close(2) will cause a
  656. // FlushFile as usual (cf. http://goo.gl/kVmNcx). On Linux, msync(2) does
  657. // nothing unless you set the MS_SYNC flag, in which case it causes a
  658. // SyncFileOp to be sent (cf. http://goo.gl/P3mErk).
  659. //
  660. // In summary: if you make data durable in both FlushFile and SyncFile, then
  661. // your users can get safe behavior from mapped files on both operating systems
  662. // by calling msync(2) with MS_SYNC, followed by munmap(2), followed by
  663. // close(2). On Linux, the msync(2) is optional (cf. http://goo.gl/EIhAxv and
  664. // the notes on WriteFileOp).
  665. //
  666. // Because of cases like dup2(2), FlushFileOps are not necessarily one to one
  667. // with OpenFileOps. They should not be used for reference counting, and the
  668. // handle must remain valid even after the flush op is received (use
  669. // ReleaseFileHandleOp for disposing of it).
  670. //
  671. // Typical "real" file systems do not implement this, presumably relying on
  672. // the kernel to write out the page cache to the block device eventually.
  673. // They can get away with this because a later open(2) will see the same
  674. // data. A file system that writes to remote storage however probably wants
  675. // to at least schedule a real flush, and maybe do it immediately in order to
  676. // return any errors that occur.
  677. type FlushFileOp struct {
  678. // The file and handle being flushed.
  679. Inode InodeID
  680. Handle HandleID
  681. }
  682. // Release a previously-minted file handle. The kernel calls this when there
  683. // are no more references to an open file: all file descriptors are closed
  684. // and all memory mappings are unmapped.
  685. //
  686. // The kernel guarantees that the handle ID will not be used in further calls
  687. // to the file system (unless it is reissued by the file system).
  688. //
  689. // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do).
  690. type ReleaseFileHandleOp struct {
  691. // The handle ID to be released. The kernel guarantees that this ID will not
  692. // be used in further calls to the file system (unless it is reissued by the
  693. // file system).
  694. Handle HandleID
  695. }
  696. ////////////////////////////////////////////////////////////////////////
  697. // Reading symlinks
  698. ////////////////////////////////////////////////////////////////////////
  699. // Read the target of a symlink inode.
  700. type ReadSymlinkOp struct {
  701. // The symlink inode that we are reading.
  702. Inode InodeID
  703. // Set by the file system: the target of the symlink.
  704. Target string
  705. }
  706. ////////////////////////////////////////////////////////////////////////
  707. // eXtended attributes
  708. ////////////////////////////////////////////////////////////////////////
  709. // Remove an extended attribute.
  710. //
  711. // This is sent in response to removexattr(2). Return ENOATTR if the
  712. // extended attribute does not exist.
  713. type RemoveXattrOp struct {
  714. // The inode that we are removing an extended attribute from.
  715. Inode InodeID
  716. // The name of the extended attribute.
  717. Name string
  718. }
  719. // Get an extended attribute.
  720. //
  721. // This is sent in response to getxattr(2). Return ENOATTR if the
  722. // extended attribute does not exist.
  723. type GetXattrOp struct {
  724. // The inode whose extended attribute we are reading.
  725. Inode InodeID
  726. // The name of the extended attribute.
  727. Name string
  728. // The destination buffer. If the size is too small for the
  729. // value, the ERANGE error should be sent.
  730. Dst []byte
  731. // Set by the file system: the number of bytes read into Dst, or
  732. // the number of bytes that would have been read into Dst if Dst was
  733. // big enough (return ERANGE in this case).
  734. BytesRead int
  735. }
  736. // List all the extended attributes for a file.
  737. //
  738. // This is sent in response to listxattr(2).
  739. type ListXattrOp struct {
  740. // The inode whose extended attributes we are listing.
  741. Inode InodeID
  742. // The destination buffer. If the size is too small for the
  743. // value, the ERANGE error should be sent.
  744. //
  745. // The output data should consist of a sequence of NUL-terminated strings,
  746. // one for each xattr.
  747. Dst []byte
  748. // Set by the file system: the number of bytes read into Dst, or
  749. // the number of bytes that would have been read into Dst if Dst was
  750. // big enough (return ERANGE in this case).
  751. BytesRead int
  752. }
  753. // Set an extended attribute.
  754. //
  755. // This is sent in response to setxattr(2). Return ENOSPC if there is
  756. // insufficient space remaining to store the extended attribute.
  757. type SetXattrOp struct {
  758. // The inode whose extended attribute we are setting.
  759. Inode InodeID
  760. // The name of the extended attribute
  761. Name string
  762. // The value to for the extened attribute.
  763. Value []byte
  764. // If Flags is 0x1, and the attribute exists already, EEXIST should be returned.
  765. // If Flags is 0x2, and the attribute does not exist, ENOATTR should be returned.
  766. // If Flags is 0x0, the extended attribute will be created if need be, or will
  767. // simply replace the value if the attribute exists.
  768. Flags uint32
  769. }