io.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150
  1. /*
  2. * This file is part of UBIFS.
  3. *
  4. * Copyright (C) 2006-2008 Nokia Corporation.
  5. * Copyright (C) 2006, 2007 University of Szeged, Hungary
  6. *
  7. * SPDX-License-Identifier: GPL-2.0+
  8. *
  9. * Authors: Artem Bityutskiy (Битюцкий Артём)
  10. * Adrian Hunter
  11. * Zoltan Sogor
  12. */
  13. /*
  14. * This file implements UBIFS I/O subsystem which provides various I/O-related
  15. * helper functions (reading/writing/checking/validating nodes) and implements
  16. * write-buffering support. Write buffers help to save space which otherwise
  17. * would have been wasted for padding to the nearest minimal I/O unit boundary.
  18. * Instead, data first goes to the write-buffer and is flushed when the
  19. * buffer is full or when it is not used for some time (by timer). This is
  20. * similar to the mechanism is used by JFFS2.
  21. *
  22. * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
  23. * write size (@c->max_write_size). The latter is the maximum amount of bytes
  24. * the underlying flash is able to program at a time, and writing in
  25. * @c->max_write_size units should presumably be faster. Obviously,
  26. * @c->min_io_size <= @c->max_write_size. Write-buffers are of
  27. * @c->max_write_size bytes in size for maximum performance. However, when a
  28. * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
  29. * boundary) which contains data is written, not the whole write-buffer,
  30. * because this is more space-efficient.
  31. *
  32. * This optimization adds few complications to the code. Indeed, on the one
  33. * hand, we want to write in optimal @c->max_write_size bytes chunks, which
  34. * also means aligning writes at the @c->max_write_size bytes offsets. On the
  35. * other hand, we do not want to waste space when synchronizing the write
  36. * buffer, so during synchronization we writes in smaller chunks. And this makes
  37. * the next write offset to be not aligned to @c->max_write_size bytes. So the
  38. * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
  39. * to @c->max_write_size bytes again. We do this by temporarily shrinking
  40. * write-buffer size (@wbuf->size).
  41. *
  42. * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
  43. * mutexes defined inside these objects. Since sometimes upper-level code
  44. * has to lock the write-buffer (e.g. journal space reservation code), many
  45. * functions related to write-buffers have "nolock" suffix which means that the
  46. * caller has to lock the write-buffer before calling this function.
  47. *
  48. * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not
  49. * aligned, UBIFS starts the next node from the aligned address, and the padded
  50. * bytes may contain any rubbish. In other words, UBIFS does not put padding
  51. * bytes in those small gaps. Common headers of nodes store real node lengths,
  52. * not aligned lengths. Indexing nodes also store real lengths in branches.
  53. *
  54. * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
  55. * uses padding nodes or padding bytes, if the padding node does not fit.
  56. *
  57. * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
  58. * they are read from the flash media.
  59. */
  60. #ifndef __UBOOT__
  61. #include <linux/crc32.h>
  62. #include <linux/slab.h>
  63. #else
  64. #include <linux/compat.h>
  65. #include <linux/err.h>
  66. #endif
  67. #include "ubifs.h"
  68. /**
  69. * ubifs_ro_mode - switch UBIFS to read read-only mode.
  70. * @c: UBIFS file-system description object
  71. * @err: error code which is the reason of switching to R/O mode
  72. */
  73. void ubifs_ro_mode(struct ubifs_info *c, int err)
  74. {
  75. if (!c->ro_error) {
  76. c->ro_error = 1;
  77. c->no_chk_data_crc = 0;
  78. c->vfs_sb->s_flags |= MS_RDONLY;
  79. ubifs_warn("switched to read-only mode, error %d", err);
  80. dump_stack();
  81. }
  82. }
  83. /*
  84. * Below are simple wrappers over UBI I/O functions which include some
  85. * additional checks and UBIFS debugging stuff. See corresponding UBI function
  86. * for more information.
  87. */
  88. int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
  89. int len, int even_ebadmsg)
  90. {
  91. int err;
  92. err = ubi_read(c->ubi, lnum, buf, offs, len);
  93. /*
  94. * In case of %-EBADMSG print the error message only if the
  95. * @even_ebadmsg is true.
  96. */
  97. if (err && (err != -EBADMSG || even_ebadmsg)) {
  98. ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
  99. len, lnum, offs, err);
  100. dump_stack();
  101. }
  102. return err;
  103. }
  104. int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
  105. int len)
  106. {
  107. int err;
  108. ubifs_assert(!c->ro_media && !c->ro_mount);
  109. if (c->ro_error)
  110. return -EROFS;
  111. if (!dbg_is_tst_rcvry(c))
  112. err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
  113. else
  114. err = dbg_leb_write(c, lnum, buf, offs, len);
  115. if (err) {
  116. ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
  117. len, lnum, offs, err);
  118. ubifs_ro_mode(c, err);
  119. dump_stack();
  120. }
  121. return err;
  122. }
  123. int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
  124. {
  125. int err;
  126. ubifs_assert(!c->ro_media && !c->ro_mount);
  127. if (c->ro_error)
  128. return -EROFS;
  129. if (!dbg_is_tst_rcvry(c))
  130. err = ubi_leb_change(c->ubi, lnum, buf, len);
  131. else
  132. err = dbg_leb_change(c, lnum, buf, len);
  133. if (err) {
  134. ubifs_err("changing %d bytes in LEB %d failed, error %d",
  135. len, lnum, err);
  136. ubifs_ro_mode(c, err);
  137. dump_stack();
  138. }
  139. return err;
  140. }
  141. int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
  142. {
  143. int err;
  144. ubifs_assert(!c->ro_media && !c->ro_mount);
  145. if (c->ro_error)
  146. return -EROFS;
  147. if (!dbg_is_tst_rcvry(c))
  148. err = ubi_leb_unmap(c->ubi, lnum);
  149. else
  150. err = dbg_leb_unmap(c, lnum);
  151. if (err) {
  152. ubifs_err("unmap LEB %d failed, error %d", lnum, err);
  153. ubifs_ro_mode(c, err);
  154. dump_stack();
  155. }
  156. return err;
  157. }
  158. int ubifs_leb_map(struct ubifs_info *c, int lnum)
  159. {
  160. int err;
  161. ubifs_assert(!c->ro_media && !c->ro_mount);
  162. if (c->ro_error)
  163. return -EROFS;
  164. if (!dbg_is_tst_rcvry(c))
  165. err = ubi_leb_map(c->ubi, lnum);
  166. else
  167. err = dbg_leb_map(c, lnum);
  168. if (err) {
  169. ubifs_err("mapping LEB %d failed, error %d", lnum, err);
  170. ubifs_ro_mode(c, err);
  171. dump_stack();
  172. }
  173. return err;
  174. }
  175. int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
  176. {
  177. int err;
  178. err = ubi_is_mapped(c->ubi, lnum);
  179. if (err < 0) {
  180. ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
  181. lnum, err);
  182. dump_stack();
  183. }
  184. return err;
  185. }
  186. /**
  187. * ubifs_check_node - check node.
  188. * @c: UBIFS file-system description object
  189. * @buf: node to check
  190. * @lnum: logical eraseblock number
  191. * @offs: offset within the logical eraseblock
  192. * @quiet: print no messages
  193. * @must_chk_crc: indicates whether to always check the CRC
  194. *
  195. * This function checks node magic number and CRC checksum. This function also
  196. * validates node length to prevent UBIFS from becoming crazy when an attacker
  197. * feeds it a file-system image with incorrect nodes. For example, too large
  198. * node length in the common header could cause UBIFS to read memory outside of
  199. * allocated buffer when checking the CRC checksum.
  200. *
  201. * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
  202. * true, which is controlled by corresponding UBIFS mount option. However, if
  203. * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
  204. * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
  205. * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
  206. * is checked. This is because during mounting or re-mounting from R/O mode to
  207. * R/W mode we may read journal nodes (when replying the journal or doing the
  208. * recovery) and the journal nodes may potentially be corrupted, so checking is
  209. * required.
  210. *
  211. * This function returns zero in case of success and %-EUCLEAN in case of bad
  212. * CRC or magic.
  213. */
  214. int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
  215. int offs, int quiet, int must_chk_crc)
  216. {
  217. int err = -EINVAL, type, node_len;
  218. uint32_t crc, node_crc, magic;
  219. const struct ubifs_ch *ch = buf;
  220. ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
  221. ubifs_assert(!(offs & 7) && offs < c->leb_size);
  222. magic = le32_to_cpu(ch->magic);
  223. if (magic != UBIFS_NODE_MAGIC) {
  224. if (!quiet)
  225. ubifs_err("bad magic %#08x, expected %#08x",
  226. magic, UBIFS_NODE_MAGIC);
  227. err = -EUCLEAN;
  228. goto out;
  229. }
  230. type = ch->node_type;
  231. if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
  232. if (!quiet)
  233. ubifs_err("bad node type %d", type);
  234. goto out;
  235. }
  236. node_len = le32_to_cpu(ch->len);
  237. if (node_len + offs > c->leb_size)
  238. goto out_len;
  239. if (c->ranges[type].max_len == 0) {
  240. if (node_len != c->ranges[type].len)
  241. goto out_len;
  242. } else if (node_len < c->ranges[type].min_len ||
  243. node_len > c->ranges[type].max_len)
  244. goto out_len;
  245. if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
  246. !c->remounting_rw && c->no_chk_data_crc)
  247. return 0;
  248. crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
  249. node_crc = le32_to_cpu(ch->crc);
  250. if (crc != node_crc) {
  251. if (!quiet)
  252. ubifs_err("bad CRC: calculated %#08x, read %#08x",
  253. crc, node_crc);
  254. err = -EUCLEAN;
  255. goto out;
  256. }
  257. return 0;
  258. out_len:
  259. if (!quiet)
  260. ubifs_err("bad node length %d", node_len);
  261. out:
  262. if (!quiet) {
  263. ubifs_err("bad node at LEB %d:%d", lnum, offs);
  264. ubifs_dump_node(c, buf);
  265. dump_stack();
  266. }
  267. return err;
  268. }
  269. /**
  270. * ubifs_pad - pad flash space.
  271. * @c: UBIFS file-system description object
  272. * @buf: buffer to put padding to
  273. * @pad: how many bytes to pad
  274. *
  275. * The flash media obliges us to write only in chunks of %c->min_io_size and
  276. * when we have to write less data we add padding node to the write-buffer and
  277. * pad it to the next minimal I/O unit's boundary. Padding nodes help when the
  278. * media is being scanned. If the amount of wasted space is not enough to fit a
  279. * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
  280. * pattern (%UBIFS_PADDING_BYTE).
  281. *
  282. * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
  283. * used.
  284. */
  285. void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
  286. {
  287. uint32_t crc;
  288. ubifs_assert(pad >= 0 && !(pad & 7));
  289. if (pad >= UBIFS_PAD_NODE_SZ) {
  290. struct ubifs_ch *ch = buf;
  291. struct ubifs_pad_node *pad_node = buf;
  292. ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
  293. ch->node_type = UBIFS_PAD_NODE;
  294. ch->group_type = UBIFS_NO_NODE_GROUP;
  295. ch->padding[0] = ch->padding[1] = 0;
  296. ch->sqnum = 0;
  297. ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
  298. pad -= UBIFS_PAD_NODE_SZ;
  299. pad_node->pad_len = cpu_to_le32(pad);
  300. crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
  301. ch->crc = cpu_to_le32(crc);
  302. memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);
  303. } else if (pad > 0)
  304. /* Too little space, padding node won't fit */
  305. memset(buf, UBIFS_PADDING_BYTE, pad);
  306. }
  307. /**
  308. * next_sqnum - get next sequence number.
  309. * @c: UBIFS file-system description object
  310. */
  311. static unsigned long long next_sqnum(struct ubifs_info *c)
  312. {
  313. unsigned long long sqnum;
  314. spin_lock(&c->cnt_lock);
  315. sqnum = ++c->max_sqnum;
  316. spin_unlock(&c->cnt_lock);
  317. if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
  318. if (sqnum >= SQNUM_WATERMARK) {
  319. ubifs_err("sequence number overflow %llu, end of life",
  320. sqnum);
  321. ubifs_ro_mode(c, -EINVAL);
  322. }
  323. ubifs_warn("running out of sequence numbers, end of life soon");
  324. }
  325. return sqnum;
  326. }
  327. /**
  328. * ubifs_prepare_node - prepare node to be written to flash.
  329. * @c: UBIFS file-system description object
  330. * @node: the node to pad
  331. * @len: node length
  332. * @pad: if the buffer has to be padded
  333. *
  334. * This function prepares node at @node to be written to the media - it
  335. * calculates node CRC, fills the common header, and adds proper padding up to
  336. * the next minimum I/O unit if @pad is not zero.
  337. */
  338. void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
  339. {
  340. uint32_t crc;
  341. struct ubifs_ch *ch = node;
  342. unsigned long long sqnum = next_sqnum(c);
  343. ubifs_assert(len >= UBIFS_CH_SZ);
  344. ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
  345. ch->len = cpu_to_le32(len);
  346. ch->group_type = UBIFS_NO_NODE_GROUP;
  347. ch->sqnum = cpu_to_le64(sqnum);
  348. ch->padding[0] = ch->padding[1] = 0;
  349. crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
  350. ch->crc = cpu_to_le32(crc);
  351. if (pad) {
  352. len = ALIGN(len, 8);
  353. pad = ALIGN(len, c->min_io_size) - len;
  354. ubifs_pad(c, node + len, pad);
  355. }
  356. }
  357. /**
  358. * ubifs_prep_grp_node - prepare node of a group to be written to flash.
  359. * @c: UBIFS file-system description object
  360. * @node: the node to pad
  361. * @len: node length
  362. * @last: indicates the last node of the group
  363. *
  364. * This function prepares node at @node to be written to the media - it
  365. * calculates node CRC and fills the common header.
  366. */
  367. void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
  368. {
  369. uint32_t crc;
  370. struct ubifs_ch *ch = node;
  371. unsigned long long sqnum = next_sqnum(c);
  372. ubifs_assert(len >= UBIFS_CH_SZ);
  373. ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
  374. ch->len = cpu_to_le32(len);
  375. if (last)
  376. ch->group_type = UBIFS_LAST_OF_NODE_GROUP;
  377. else
  378. ch->group_type = UBIFS_IN_NODE_GROUP;
  379. ch->sqnum = cpu_to_le64(sqnum);
  380. ch->padding[0] = ch->padding[1] = 0;
  381. crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
  382. ch->crc = cpu_to_le32(crc);
  383. }
  384. #ifndef __UBOOT__
  385. /**
  386. * wbuf_timer_callback - write-buffer timer callback function.
  387. * @data: timer data (write-buffer descriptor)
  388. *
  389. * This function is called when the write-buffer timer expires.
  390. */
  391. static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
  392. {
  393. struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
  394. dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
  395. wbuf->need_sync = 1;
  396. wbuf->c->need_wbuf_sync = 1;
  397. ubifs_wake_up_bgt(wbuf->c);
  398. return HRTIMER_NORESTART;
  399. }
  400. /**
  401. * new_wbuf_timer - start new write-buffer timer.
  402. * @wbuf: write-buffer descriptor
  403. */
  404. static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
  405. {
  406. ubifs_assert(!hrtimer_active(&wbuf->timer));
  407. if (wbuf->no_timer)
  408. return;
  409. dbg_io("set timer for jhead %s, %llu-%llu millisecs",
  410. dbg_jhead(wbuf->jhead),
  411. div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
  412. div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
  413. USEC_PER_SEC));
  414. hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
  415. HRTIMER_MODE_REL);
  416. }
  417. #endif
  418. /**
  419. * cancel_wbuf_timer - cancel write-buffer timer.
  420. * @wbuf: write-buffer descriptor
  421. */
  422. static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
  423. {
  424. if (wbuf->no_timer)
  425. return;
  426. wbuf->need_sync = 0;
  427. #ifndef __UBOOT__
  428. hrtimer_cancel(&wbuf->timer);
  429. #endif
  430. }
  431. /**
  432. * ubifs_wbuf_sync_nolock - synchronize write-buffer.
  433. * @wbuf: write-buffer to synchronize
  434. *
  435. * This function synchronizes write-buffer @buf and returns zero in case of
  436. * success or a negative error code in case of failure.
  437. *
  438. * Note, although write-buffers are of @c->max_write_size, this function does
  439. * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
  440. * if the write-buffer is only partially filled with data, only the used part
  441. * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
  442. * This way we waste less space.
  443. */
  444. int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
  445. {
  446. struct ubifs_info *c = wbuf->c;
  447. int err, dirt, sync_len;
  448. cancel_wbuf_timer_nolock(wbuf);
  449. if (!wbuf->used || wbuf->lnum == -1)
  450. /* Write-buffer is empty or not seeked */
  451. return 0;
  452. dbg_io("LEB %d:%d, %d bytes, jhead %s",
  453. wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
  454. ubifs_assert(!(wbuf->avail & 7));
  455. ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
  456. ubifs_assert(wbuf->size >= c->min_io_size);
  457. ubifs_assert(wbuf->size <= c->max_write_size);
  458. ubifs_assert(wbuf->size % c->min_io_size == 0);
  459. ubifs_assert(!c->ro_media && !c->ro_mount);
  460. if (c->leb_size - wbuf->offs >= c->max_write_size)
  461. ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
  462. if (c->ro_error)
  463. return -EROFS;
  464. /*
  465. * Do not write whole write buffer but write only the minimum necessary
  466. * amount of min. I/O units.
  467. */
  468. sync_len = ALIGN(wbuf->used, c->min_io_size);
  469. dirt = sync_len - wbuf->used;
  470. if (dirt)
  471. ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
  472. err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len);
  473. if (err)
  474. return err;
  475. spin_lock(&wbuf->lock);
  476. wbuf->offs += sync_len;
  477. /*
  478. * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
  479. * But our goal is to optimize writes and make sure we write in
  480. * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
  481. * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
  482. * sure that @wbuf->offs + @wbuf->size is aligned to
  483. * @c->max_write_size. This way we make sure that after next
  484. * write-buffer flush we are again at the optimal offset (aligned to
  485. * @c->max_write_size).
  486. */
  487. if (c->leb_size - wbuf->offs < c->max_write_size)
  488. wbuf->size = c->leb_size - wbuf->offs;
  489. else if (wbuf->offs & (c->max_write_size - 1))
  490. wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
  491. else
  492. wbuf->size = c->max_write_size;
  493. wbuf->avail = wbuf->size;
  494. wbuf->used = 0;
  495. wbuf->next_ino = 0;
  496. spin_unlock(&wbuf->lock);
  497. if (wbuf->sync_callback)
  498. err = wbuf->sync_callback(c, wbuf->lnum,
  499. c->leb_size - wbuf->offs, dirt);
  500. return err;
  501. }
  502. /**
  503. * ubifs_wbuf_seek_nolock - seek write-buffer.
  504. * @wbuf: write-buffer
  505. * @lnum: logical eraseblock number to seek to
  506. * @offs: logical eraseblock offset to seek to
  507. *
  508. * This function targets the write-buffer to logical eraseblock @lnum:@offs.
  509. * The write-buffer has to be empty. Returns zero in case of success and a
  510. * negative error code in case of failure.
  511. */
  512. int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs)
  513. {
  514. const struct ubifs_info *c = wbuf->c;
  515. dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
  516. ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
  517. ubifs_assert(offs >= 0 && offs <= c->leb_size);
  518. ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
  519. ubifs_assert(lnum != wbuf->lnum);
  520. ubifs_assert(wbuf->used == 0);
  521. spin_lock(&wbuf->lock);
  522. wbuf->lnum = lnum;
  523. wbuf->offs = offs;
  524. if (c->leb_size - wbuf->offs < c->max_write_size)
  525. wbuf->size = c->leb_size - wbuf->offs;
  526. else if (wbuf->offs & (c->max_write_size - 1))
  527. wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
  528. else
  529. wbuf->size = c->max_write_size;
  530. wbuf->avail = wbuf->size;
  531. wbuf->used = 0;
  532. spin_unlock(&wbuf->lock);
  533. return 0;
  534. }
  535. #ifndef __UBOOT__
  536. /**
  537. * ubifs_bg_wbufs_sync - synchronize write-buffers.
  538. * @c: UBIFS file-system description object
  539. *
  540. * This function is called by background thread to synchronize write-buffers.
  541. * Returns zero in case of success and a negative error code in case of
  542. * failure.
  543. */
  544. int ubifs_bg_wbufs_sync(struct ubifs_info *c)
  545. {
  546. int err, i;
  547. ubifs_assert(!c->ro_media && !c->ro_mount);
  548. if (!c->need_wbuf_sync)
  549. return 0;
  550. c->need_wbuf_sync = 0;
  551. if (c->ro_error) {
  552. err = -EROFS;
  553. goto out_timers;
  554. }
  555. dbg_io("synchronize");
  556. for (i = 0; i < c->jhead_cnt; i++) {
  557. struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
  558. cond_resched();
  559. /*
  560. * If the mutex is locked then wbuf is being changed, so
  561. * synchronization is not necessary.
  562. */
  563. if (mutex_is_locked(&wbuf->io_mutex))
  564. continue;
  565. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  566. if (!wbuf->need_sync) {
  567. mutex_unlock(&wbuf->io_mutex);
  568. continue;
  569. }
  570. err = ubifs_wbuf_sync_nolock(wbuf);
  571. mutex_unlock(&wbuf->io_mutex);
  572. if (err) {
  573. ubifs_err("cannot sync write-buffer, error %d", err);
  574. ubifs_ro_mode(c, err);
  575. goto out_timers;
  576. }
  577. }
  578. return 0;
  579. out_timers:
  580. /* Cancel all timers to prevent repeated errors */
  581. for (i = 0; i < c->jhead_cnt; i++) {
  582. struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
  583. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  584. cancel_wbuf_timer_nolock(wbuf);
  585. mutex_unlock(&wbuf->io_mutex);
  586. }
  587. return err;
  588. }
  589. /**
  590. * ubifs_wbuf_write_nolock - write data to flash via write-buffer.
  591. * @wbuf: write-buffer
  592. * @buf: node to write
  593. * @len: node length
  594. *
  595. * This function writes data to flash via write-buffer @wbuf. This means that
  596. * the last piece of the node won't reach the flash media immediately if it
  597. * does not take whole max. write unit (@c->max_write_size). Instead, the node
  598. * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
  599. * because more data are appended to the write-buffer).
  600. *
  601. * This function returns zero in case of success and a negative error code in
  602. * case of failure. If the node cannot be written because there is no more
  603. * space in this logical eraseblock, %-ENOSPC is returned.
  604. */
  605. int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
  606. {
  607. struct ubifs_info *c = wbuf->c;
  608. int err, written, n, aligned_len = ALIGN(len, 8);
  609. dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
  610. dbg_ntype(((struct ubifs_ch *)buf)->node_type),
  611. dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
  612. ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
  613. ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
  614. ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
  615. ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
  616. ubifs_assert(wbuf->size >= c->min_io_size);
  617. ubifs_assert(wbuf->size <= c->max_write_size);
  618. ubifs_assert(wbuf->size % c->min_io_size == 0);
  619. ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
  620. ubifs_assert(!c->ro_media && !c->ro_mount);
  621. ubifs_assert(!c->space_fixup);
  622. if (c->leb_size - wbuf->offs >= c->max_write_size)
  623. ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
  624. if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
  625. err = -ENOSPC;
  626. goto out;
  627. }
  628. cancel_wbuf_timer_nolock(wbuf);
  629. if (c->ro_error)
  630. return -EROFS;
  631. if (aligned_len <= wbuf->avail) {
  632. /*
  633. * The node is not very large and fits entirely within
  634. * write-buffer.
  635. */
  636. memcpy(wbuf->buf + wbuf->used, buf, len);
  637. if (aligned_len == wbuf->avail) {
  638. dbg_io("flush jhead %s wbuf to LEB %d:%d",
  639. dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
  640. err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
  641. wbuf->offs, wbuf->size);
  642. if (err)
  643. goto out;
  644. spin_lock(&wbuf->lock);
  645. wbuf->offs += wbuf->size;
  646. if (c->leb_size - wbuf->offs >= c->max_write_size)
  647. wbuf->size = c->max_write_size;
  648. else
  649. wbuf->size = c->leb_size - wbuf->offs;
  650. wbuf->avail = wbuf->size;
  651. wbuf->used = 0;
  652. wbuf->next_ino = 0;
  653. spin_unlock(&wbuf->lock);
  654. } else {
  655. spin_lock(&wbuf->lock);
  656. wbuf->avail -= aligned_len;
  657. wbuf->used += aligned_len;
  658. spin_unlock(&wbuf->lock);
  659. }
  660. goto exit;
  661. }
  662. written = 0;
  663. if (wbuf->used) {
  664. /*
  665. * The node is large enough and does not fit entirely within
  666. * current available space. We have to fill and flush
  667. * write-buffer and switch to the next max. write unit.
  668. */
  669. dbg_io("flush jhead %s wbuf to LEB %d:%d",
  670. dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
  671. memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
  672. err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
  673. wbuf->size);
  674. if (err)
  675. goto out;
  676. wbuf->offs += wbuf->size;
  677. len -= wbuf->avail;
  678. aligned_len -= wbuf->avail;
  679. written += wbuf->avail;
  680. } else if (wbuf->offs & (c->max_write_size - 1)) {
  681. /*
  682. * The write-buffer offset is not aligned to
  683. * @c->max_write_size and @wbuf->size is less than
  684. * @c->max_write_size. Write @wbuf->size bytes to make sure the
  685. * following writes are done in optimal @c->max_write_size
  686. * chunks.
  687. */
  688. dbg_io("write %d bytes to LEB %d:%d",
  689. wbuf->size, wbuf->lnum, wbuf->offs);
  690. err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
  691. wbuf->size);
  692. if (err)
  693. goto out;
  694. wbuf->offs += wbuf->size;
  695. len -= wbuf->size;
  696. aligned_len -= wbuf->size;
  697. written += wbuf->size;
  698. }
  699. /*
  700. * The remaining data may take more whole max. write units, so write the
  701. * remains multiple to max. write unit size directly to the flash media.
  702. * We align node length to 8-byte boundary because we anyway flash wbuf
  703. * if the remaining space is less than 8 bytes.
  704. */
  705. n = aligned_len >> c->max_write_shift;
  706. if (n) {
  707. n <<= c->max_write_shift;
  708. dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
  709. wbuf->offs);
  710. err = ubifs_leb_write(c, wbuf->lnum, buf + written,
  711. wbuf->offs, n);
  712. if (err)
  713. goto out;
  714. wbuf->offs += n;
  715. aligned_len -= n;
  716. len -= n;
  717. written += n;
  718. }
  719. spin_lock(&wbuf->lock);
  720. if (aligned_len)
  721. /*
  722. * And now we have what's left and what does not take whole
  723. * max. write unit, so write it to the write-buffer and we are
  724. * done.
  725. */
  726. memcpy(wbuf->buf, buf + written, len);
  727. if (c->leb_size - wbuf->offs >= c->max_write_size)
  728. wbuf->size = c->max_write_size;
  729. else
  730. wbuf->size = c->leb_size - wbuf->offs;
  731. wbuf->avail = wbuf->size - aligned_len;
  732. wbuf->used = aligned_len;
  733. wbuf->next_ino = 0;
  734. spin_unlock(&wbuf->lock);
  735. exit:
  736. if (wbuf->sync_callback) {
  737. int free = c->leb_size - wbuf->offs - wbuf->used;
  738. err = wbuf->sync_callback(c, wbuf->lnum, free, 0);
  739. if (err)
  740. goto out;
  741. }
  742. if (wbuf->used)
  743. new_wbuf_timer_nolock(wbuf);
  744. return 0;
  745. out:
  746. ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
  747. len, wbuf->lnum, wbuf->offs, err);
  748. ubifs_dump_node(c, buf);
  749. dump_stack();
  750. ubifs_dump_leb(c, wbuf->lnum);
  751. return err;
  752. }
  753. /**
  754. * ubifs_write_node - write node to the media.
  755. * @c: UBIFS file-system description object
  756. * @buf: the node to write
  757. * @len: node length
  758. * @lnum: logical eraseblock number
  759. * @offs: offset within the logical eraseblock
  760. *
  761. * This function automatically fills node magic number, assigns sequence
  762. * number, and calculates node CRC checksum. The length of the @buf buffer has
  763. * to be aligned to the minimal I/O unit size. This function automatically
  764. * appends padding node and padding bytes if needed. Returns zero in case of
  765. * success and a negative error code in case of failure.
  766. */
  767. int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
  768. int offs)
  769. {
  770. int err, buf_len = ALIGN(len, c->min_io_size);
  771. dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
  772. lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
  773. buf_len);
  774. ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
  775. ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
  776. ubifs_assert(!c->ro_media && !c->ro_mount);
  777. ubifs_assert(!c->space_fixup);
  778. if (c->ro_error)
  779. return -EROFS;
  780. ubifs_prepare_node(c, buf, len, 1);
  781. err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
  782. if (err)
  783. ubifs_dump_node(c, buf);
  784. return err;
  785. }
  786. #endif
  787. /**
  788. * ubifs_read_node_wbuf - read node from the media or write-buffer.
  789. * @wbuf: wbuf to check for un-written data
  790. * @buf: buffer to read to
  791. * @type: node type
  792. * @len: node length
  793. * @lnum: logical eraseblock number
  794. * @offs: offset within the logical eraseblock
  795. *
  796. * This function reads a node of known type and length, checks it and stores
  797. * in @buf. If the node partially or fully sits in the write-buffer, this
  798. * function takes data from the buffer, otherwise it reads the flash media.
  799. * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative
  800. * error code in case of failure.
  801. */
  802. int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
  803. int lnum, int offs)
  804. {
  805. const struct ubifs_info *c = wbuf->c;
  806. int err, rlen, overlap;
  807. struct ubifs_ch *ch = buf;
  808. dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
  809. dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
  810. ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
  811. ubifs_assert(!(offs & 7) && offs < c->leb_size);
  812. ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
  813. spin_lock(&wbuf->lock);
  814. overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
  815. if (!overlap) {
  816. /* We may safely unlock the write-buffer and read the data */
  817. spin_unlock(&wbuf->lock);
  818. return ubifs_read_node(c, buf, type, len, lnum, offs);
  819. }
  820. /* Don't read under wbuf */
  821. rlen = wbuf->offs - offs;
  822. if (rlen < 0)
  823. rlen = 0;
  824. /* Copy the rest from the write-buffer */
  825. memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
  826. spin_unlock(&wbuf->lock);
  827. if (rlen > 0) {
  828. /* Read everything that goes before write-buffer */
  829. err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
  830. if (err && err != -EBADMSG)
  831. return err;
  832. }
  833. if (type != ch->node_type) {
  834. ubifs_err("bad node type (%d but expected %d)",
  835. ch->node_type, type);
  836. goto out;
  837. }
  838. err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
  839. if (err) {
  840. ubifs_err("expected node type %d", type);
  841. return err;
  842. }
  843. rlen = le32_to_cpu(ch->len);
  844. if (rlen != len) {
  845. ubifs_err("bad node length %d, expected %d", rlen, len);
  846. goto out;
  847. }
  848. return 0;
  849. out:
  850. ubifs_err("bad node at LEB %d:%d", lnum, offs);
  851. ubifs_dump_node(c, buf);
  852. dump_stack();
  853. return -EINVAL;
  854. }
  855. /**
  856. * ubifs_read_node - read node.
  857. * @c: UBIFS file-system description object
  858. * @buf: buffer to read to
  859. * @type: node type
  860. * @len: node length (not aligned)
  861. * @lnum: logical eraseblock number
  862. * @offs: offset within the logical eraseblock
  863. *
  864. * This function reads a node of known type and and length, checks it and
  865. * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
  866. * and a negative error code in case of failure.
  867. */
  868. int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
  869. int lnum, int offs)
  870. {
  871. int err, l;
  872. struct ubifs_ch *ch = buf;
  873. dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
  874. ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
  875. ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
  876. ubifs_assert(!(offs & 7) && offs < c->leb_size);
  877. ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
  878. err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
  879. if (err && err != -EBADMSG)
  880. return err;
  881. if (type != ch->node_type) {
  882. ubifs_err("bad node type (%d but expected %d)",
  883. ch->node_type, type);
  884. goto out;
  885. }
  886. err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
  887. if (err) {
  888. ubifs_err("expected node type %d", type);
  889. return err;
  890. }
  891. l = le32_to_cpu(ch->len);
  892. if (l != len) {
  893. ubifs_err("bad node length %d, expected %d", l, len);
  894. goto out;
  895. }
  896. return 0;
  897. out:
  898. ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
  899. ubi_is_mapped(c->ubi, lnum));
  900. ubifs_dump_node(c, buf);
  901. dump_stack();
  902. return -EINVAL;
  903. }
  904. /**
  905. * ubifs_wbuf_init - initialize write-buffer.
  906. * @c: UBIFS file-system description object
  907. * @wbuf: write-buffer to initialize
  908. *
  909. * This function initializes write-buffer. Returns zero in case of success
  910. * %-ENOMEM in case of failure.
  911. */
  912. int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
  913. {
  914. size_t size;
  915. wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
  916. if (!wbuf->buf)
  917. return -ENOMEM;
  918. size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
  919. wbuf->inodes = kmalloc(size, GFP_KERNEL);
  920. if (!wbuf->inodes) {
  921. kfree(wbuf->buf);
  922. wbuf->buf = NULL;
  923. return -ENOMEM;
  924. }
  925. wbuf->used = 0;
  926. wbuf->lnum = wbuf->offs = -1;
  927. /*
  928. * If the LEB starts at the max. write size aligned address, then
  929. * write-buffer size has to be set to @c->max_write_size. Otherwise,
  930. * set it to something smaller so that it ends at the closest max.
  931. * write size boundary.
  932. */
  933. size = c->max_write_size - (c->leb_start % c->max_write_size);
  934. wbuf->avail = wbuf->size = size;
  935. wbuf->sync_callback = NULL;
  936. mutex_init(&wbuf->io_mutex);
  937. spin_lock_init(&wbuf->lock);
  938. wbuf->c = c;
  939. wbuf->next_ino = 0;
  940. #ifndef __UBOOT__
  941. hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  942. wbuf->timer.function = wbuf_timer_callback_nolock;
  943. wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
  944. wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
  945. wbuf->delta *= 1000000000ULL;
  946. ubifs_assert(wbuf->delta <= ULONG_MAX);
  947. #endif
  948. return 0;
  949. }
  950. /**
  951. * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
  952. * @wbuf: the write-buffer where to add
  953. * @inum: the inode number
  954. *
  955. * This function adds an inode number to the inode array of the write-buffer.
  956. */
  957. void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum)
  958. {
  959. if (!wbuf->buf)
  960. /* NOR flash or something similar */
  961. return;
  962. spin_lock(&wbuf->lock);
  963. if (wbuf->used)
  964. wbuf->inodes[wbuf->next_ino++] = inum;
  965. spin_unlock(&wbuf->lock);
  966. }
  967. /**
  968. * wbuf_has_ino - returns if the wbuf contains data from the inode.
  969. * @wbuf: the write-buffer
  970. * @inum: the inode number
  971. *
  972. * This function returns with %1 if the write-buffer contains some data from the
  973. * given inode otherwise it returns with %0.
  974. */
  975. static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum)
  976. {
  977. int i, ret = 0;
  978. spin_lock(&wbuf->lock);
  979. for (i = 0; i < wbuf->next_ino; i++)
  980. if (inum == wbuf->inodes[i]) {
  981. ret = 1;
  982. break;
  983. }
  984. spin_unlock(&wbuf->lock);
  985. return ret;
  986. }
  987. /**
  988. * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode.
  989. * @c: UBIFS file-system description object
  990. * @inode: inode to synchronize
  991. *
  992. * This function synchronizes write-buffers which contain nodes belonging to
  993. * @inode. Returns zero in case of success and a negative error code in case of
  994. * failure.
  995. */
  996. int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode)
  997. {
  998. int i, err = 0;
  999. for (i = 0; i < c->jhead_cnt; i++) {
  1000. struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
  1001. if (i == GCHD)
  1002. /*
  1003. * GC head is special, do not look at it. Even if the
  1004. * head contains something related to this inode, it is
  1005. * a _copy_ of corresponding on-flash node which sits
  1006. * somewhere else.
  1007. */
  1008. continue;
  1009. if (!wbuf_has_ino(wbuf, inode->i_ino))
  1010. continue;
  1011. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  1012. if (wbuf_has_ino(wbuf, inode->i_ino))
  1013. err = ubifs_wbuf_sync_nolock(wbuf);
  1014. mutex_unlock(&wbuf->io_mutex);
  1015. if (err) {
  1016. ubifs_ro_mode(c, err);
  1017. return err;
  1018. }
  1019. }
  1020. return 0;
  1021. }