hashtable.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. /*
  2. * This implementation is based on code from uClibc-0.9.30.3 but was
  3. * modified and extended for use within U-Boot.
  4. *
  5. * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
  6. *
  7. * Original license header:
  8. *
  9. * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
  10. * This file is part of the GNU C Library.
  11. * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
  12. *
  13. * The GNU C Library is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * The GNU C Library is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with the GNU C Library; if not, write to the Free
  25. * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26. * 02111-1307 USA.
  27. */
  28. #include <errno.h>
  29. #include <malloc.h>
  30. #ifdef USE_HOSTCC /* HOST build */
  31. # include <string.h>
  32. # include <assert.h>
  33. # ifndef debug
  34. # ifdef DEBUG
  35. # define debug(fmt,args...) printf(fmt ,##args)
  36. # else
  37. # define debug(fmt,args...)
  38. # endif
  39. # endif
  40. #else /* U-Boot build */
  41. # include <common.h>
  42. # include <linux/string.h>
  43. #endif
  44. #ifndef CONFIG_ENV_MIN_ENTRIES /* minimum number of entries */
  45. #define CONFIG_ENV_MIN_ENTRIES 64
  46. #endif
  47. #ifndef CONFIG_ENV_MAX_ENTRIES /* maximum number of entries */
  48. #define CONFIG_ENV_MAX_ENTRIES 512
  49. #endif
  50. #include "search.h"
  51. /*
  52. * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  53. * [Knuth] The Art of Computer Programming, part 3 (6.4)
  54. */
  55. /*
  56. * The reentrant version has no static variables to maintain the state.
  57. * Instead the interface of all functions is extended to take an argument
  58. * which describes the current status.
  59. */
  60. typedef struct _ENTRY {
  61. int used;
  62. ENTRY entry;
  63. } _ENTRY;
  64. /*
  65. * hcreate()
  66. */
  67. /*
  68. * For the used double hash method the table size has to be a prime. To
  69. * correct the user given table size we need a prime test. This trivial
  70. * algorithm is adequate because
  71. * a) the code is (most probably) called a few times per program run and
  72. * b) the number is small because the table must fit in the core
  73. * */
  74. static int isprime(unsigned int number)
  75. {
  76. /* no even number will be passed */
  77. unsigned int div = 3;
  78. while (div * div < number && number % div != 0)
  79. div += 2;
  80. return number % div != 0;
  81. }
  82. /*
  83. * Before using the hash table we must allocate memory for it.
  84. * Test for an existing table are done. We allocate one element
  85. * more as the found prime number says. This is done for more effective
  86. * indexing as explained in the comment for the hsearch function.
  87. * The contents of the table is zeroed, especially the field used
  88. * becomes zero.
  89. */
  90. int hcreate_r(size_t nel, struct hsearch_data *htab)
  91. {
  92. /* Test for correct arguments. */
  93. if (htab == NULL) {
  94. __set_errno(EINVAL);
  95. return 0;
  96. }
  97. /* There is still another table active. Return with error. */
  98. if (htab->table != NULL)
  99. return 0;
  100. /* Change nel to the first prime number not smaller as nel. */
  101. nel |= 1; /* make odd */
  102. while (!isprime(nel))
  103. nel += 2;
  104. htab->size = nel;
  105. htab->filled = 0;
  106. /* allocate memory and zero out */
  107. htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
  108. if (htab->table == NULL)
  109. return 0;
  110. /* everything went alright */
  111. return 1;
  112. }
  113. /*
  114. * hdestroy()
  115. */
  116. /*
  117. * After using the hash table it has to be destroyed. The used memory can
  118. * be freed and the local static variable can be marked as not used.
  119. */
  120. void hdestroy_r(struct hsearch_data *htab)
  121. {
  122. int i;
  123. /* Test for correct arguments. */
  124. if (htab == NULL) {
  125. __set_errno(EINVAL);
  126. return;
  127. }
  128. /* free used memory */
  129. for (i = 1; i <= htab->size; ++i) {
  130. if (htab->table[i].used > 0) {
  131. ENTRY *ep = &htab->table[i].entry;
  132. free(ep->key);
  133. free(ep->data);
  134. }
  135. }
  136. free(htab->table);
  137. /* the sign for an existing table is an value != NULL in htable */
  138. htab->table = NULL;
  139. }
  140. /*
  141. * hsearch()
  142. */
  143. /*
  144. * This is the search function. It uses double hashing with open addressing.
  145. * The argument item.key has to be a pointer to an zero terminated, most
  146. * probably strings of chars. The function for generating a number of the
  147. * strings is simple but fast. It can be replaced by a more complex function
  148. * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
  149. *
  150. * We use an trick to speed up the lookup. The table is created by hcreate
  151. * with one more element available. This enables us to use the index zero
  152. * special. This index will never be used because we store the first hash
  153. * index in the field used where zero means not used. Every other value
  154. * means used. The used field can be used as a first fast comparison for
  155. * equality of the stored and the parameter value. This helps to prevent
  156. * unnecessary expensive calls of strcmp.
  157. *
  158. * This implementation differs from the standard library version of
  159. * this function in a number of ways:
  160. *
  161. * - While the standard version does not make any assumptions about
  162. * the type of the stored data objects at all, this implementation
  163. * works with NUL terminated strings only.
  164. * - Instead of storing just pointers to the original objects, we
  165. * create local copies so the caller does not need to care about the
  166. * data any more.
  167. * - The standard implementation does not provide a way to update an
  168. * existing entry. This version will create a new entry or update an
  169. * existing one when both "action == ENTER" and "item.data != NULL".
  170. * - Instead of returning 1 on success, we return the index into the
  171. * internal hash table, which is also guaranteed to be positive.
  172. * This allows us direct access to the found hash table slot for
  173. * example for functions like hdelete().
  174. */
  175. int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
  176. struct hsearch_data *htab)
  177. {
  178. unsigned int idx;
  179. size_t key_len = strlen(match);
  180. for (idx = last_idx + 1; idx < htab->size; ++idx) {
  181. if (htab->table[idx].used <= 0)
  182. continue;
  183. if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
  184. *retval = &htab->table[idx].entry;
  185. return idx;
  186. }
  187. }
  188. __set_errno(ESRCH);
  189. *retval = NULL;
  190. return 0;
  191. }
  192. int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
  193. struct hsearch_data *htab)
  194. {
  195. unsigned int hval;
  196. unsigned int count;
  197. unsigned int len = strlen(item.key);
  198. unsigned int idx;
  199. unsigned int first_deleted = 0;
  200. /* Compute an value for the given string. Perhaps use a better method. */
  201. hval = len;
  202. count = len;
  203. while (count-- > 0) {
  204. hval <<= 4;
  205. hval += item.key[count];
  206. }
  207. /*
  208. * First hash function:
  209. * simply take the modul but prevent zero.
  210. */
  211. hval %= htab->size;
  212. if (hval == 0)
  213. ++hval;
  214. /* The first index tried. */
  215. idx = hval;
  216. if (htab->table[idx].used) {
  217. /*
  218. * Further action might be required according to the
  219. * action value.
  220. */
  221. unsigned hval2;
  222. if (htab->table[idx].used == -1
  223. && !first_deleted)
  224. first_deleted = idx;
  225. if (htab->table[idx].used == hval
  226. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  227. /* Overwrite existing value? */
  228. if ((action == ENTER) && (item.data != NULL)) {
  229. free(htab->table[idx].entry.data);
  230. htab->table[idx].entry.data =
  231. strdup(item.data);
  232. if (!htab->table[idx].entry.data) {
  233. __set_errno(ENOMEM);
  234. *retval = NULL;
  235. return 0;
  236. }
  237. }
  238. /* return found entry */
  239. *retval = &htab->table[idx].entry;
  240. return idx;
  241. }
  242. /*
  243. * Second hash function:
  244. * as suggested in [Knuth]
  245. */
  246. hval2 = 1 + hval % (htab->size - 2);
  247. do {
  248. /*
  249. * Because SIZE is prime this guarantees to
  250. * step through all available indices.
  251. */
  252. if (idx <= hval2)
  253. idx = htab->size + idx - hval2;
  254. else
  255. idx -= hval2;
  256. /*
  257. * If we visited all entries leave the loop
  258. * unsuccessfully.
  259. */
  260. if (idx == hval)
  261. break;
  262. /* If entry is found use it. */
  263. if ((htab->table[idx].used == hval)
  264. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  265. /* Overwrite existing value? */
  266. if ((action == ENTER) && (item.data != NULL)) {
  267. free(htab->table[idx].entry.data);
  268. htab->table[idx].entry.data =
  269. strdup(item.data);
  270. if (!htab->table[idx].entry.data) {
  271. __set_errno(ENOMEM);
  272. *retval = NULL;
  273. return 0;
  274. }
  275. }
  276. /* return found entry */
  277. *retval = &htab->table[idx].entry;
  278. return idx;
  279. }
  280. }
  281. while (htab->table[idx].used);
  282. }
  283. /* An empty bucket has been found. */
  284. if (action == ENTER) {
  285. /*
  286. * If table is full and another entry should be
  287. * entered return with error.
  288. */
  289. if (htab->filled == htab->size) {
  290. __set_errno(ENOMEM);
  291. *retval = NULL;
  292. return 0;
  293. }
  294. /*
  295. * Create new entry;
  296. * create copies of item.key and item.data
  297. */
  298. if (first_deleted)
  299. idx = first_deleted;
  300. htab->table[idx].used = hval;
  301. htab->table[idx].entry.key = strdup(item.key);
  302. htab->table[idx].entry.data = strdup(item.data);
  303. if (!htab->table[idx].entry.key ||
  304. !htab->table[idx].entry.data) {
  305. __set_errno(ENOMEM);
  306. *retval = NULL;
  307. return 0;
  308. }
  309. ++htab->filled;
  310. /* return new entry */
  311. *retval = &htab->table[idx].entry;
  312. return 1;
  313. }
  314. __set_errno(ESRCH);
  315. *retval = NULL;
  316. return 0;
  317. }
  318. /*
  319. * hdelete()
  320. */
  321. /*
  322. * The standard implementation of hsearch(3) does not provide any way
  323. * to delete any entries from the hash table. We extend the code to
  324. * do that.
  325. */
  326. int hdelete_r(const char *key, struct hsearch_data *htab)
  327. {
  328. ENTRY e, *ep;
  329. int idx;
  330. debug("hdelete: DELETE key \"%s\"\n", key);
  331. e.key = (char *)key;
  332. if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
  333. __set_errno(ESRCH);
  334. return 0; /* not found */
  335. }
  336. /* free used ENTRY */
  337. debug("hdelete: DELETING key \"%s\"\n", key);
  338. free(ep->key);
  339. free(ep->data);
  340. htab->table[idx].used = -1;
  341. --htab->filled;
  342. return 1;
  343. }
  344. /*
  345. * hexport()
  346. */
  347. /*
  348. * Export the data stored in the hash table in linearized form.
  349. *
  350. * Entries are exported as "name=value" strings, separated by an
  351. * arbitrary (non-NUL, of course) separator character. This allows to
  352. * use this function both when formatting the U-Boot environment for
  353. * external storage (using '\0' as separator), but also when using it
  354. * for the "printenv" command to print all variables, simply by using
  355. * as '\n" as separator. This can also be used for new features like
  356. * exporting the environment data as text file, including the option
  357. * for later re-import.
  358. *
  359. * The entries in the result list will be sorted by ascending key
  360. * values.
  361. *
  362. * If the separator character is different from NUL, then any
  363. * separator characters and backslash characters in the values will
  364. * be escaped by a preceeding backslash in output. This is needed for
  365. * example to enable multi-line values, especially when the output
  366. * shall later be parsed (for example, for re-import).
  367. *
  368. * There are several options how the result buffer is handled:
  369. *
  370. * *resp size
  371. * -----------
  372. * NULL 0 A string of sufficient length will be allocated.
  373. * NULL >0 A string of the size given will be
  374. * allocated. An error will be returned if the size is
  375. * not sufficient. Any unused bytes in the string will
  376. * be '\0'-padded.
  377. * !NULL 0 The user-supplied buffer will be used. No length
  378. * checking will be performed, i. e. it is assumed that
  379. * the buffer size will always be big enough. DANGEROUS.
  380. * !NULL >0 The user-supplied buffer will be used. An error will
  381. * be returned if the size is not sufficient. Any unused
  382. * bytes in the string will be '\0'-padded.
  383. */
  384. static int cmpkey(const void *p1, const void *p2)
  385. {
  386. ENTRY *e1 = *(ENTRY **) p1;
  387. ENTRY *e2 = *(ENTRY **) p2;
  388. return (strcmp(e1->key, e2->key));
  389. }
  390. ssize_t hexport_r(struct hsearch_data *htab, const char sep,
  391. char **resp, size_t size)
  392. {
  393. ENTRY *list[htab->size];
  394. char *res, *p;
  395. size_t totlen;
  396. int i, n;
  397. /* Test for correct arguments. */
  398. if ((resp == NULL) || (htab == NULL)) {
  399. __set_errno(EINVAL);
  400. return (-1);
  401. }
  402. debug("EXPORT table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
  403. htab, htab->size, htab->filled, size);
  404. /*
  405. * Pass 1:
  406. * search used entries,
  407. * save addresses and compute total length
  408. */
  409. for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
  410. if (htab->table[i].used > 0) {
  411. ENTRY *ep = &htab->table[i].entry;
  412. list[n++] = ep;
  413. totlen += strlen(ep->key) + 2;
  414. if (sep == '\0') {
  415. totlen += strlen(ep->data);
  416. } else { /* check if escapes are needed */
  417. char *s = ep->data;
  418. while (*s) {
  419. ++totlen;
  420. /* add room for needed escape chars */
  421. if ((*s == sep) || (*s == '\\'))
  422. ++totlen;
  423. ++s;
  424. }
  425. }
  426. totlen += 2; /* for '=' and 'sep' char */
  427. }
  428. }
  429. #ifdef DEBUG
  430. /* Pass 1a: print unsorted list */
  431. printf("Unsorted: n=%d\n", n);
  432. for (i = 0; i < n; ++i) {
  433. printf("\t%3d: %p ==> %-10s => %s\n",
  434. i, list[i], list[i]->key, list[i]->data);
  435. }
  436. #endif
  437. /* Sort list by keys */
  438. qsort(list, n, sizeof(ENTRY *), cmpkey);
  439. /* Check if the user supplied buffer size is sufficient */
  440. if (size) {
  441. if (size < totlen + 1) { /* provided buffer too small */
  442. debug("### buffer too small: %d, but need %d\n",
  443. size, totlen + 1);
  444. __set_errno(ENOMEM);
  445. return (-1);
  446. }
  447. } else {
  448. size = totlen + 1;
  449. }
  450. /* Check if the user provided a buffer */
  451. if (*resp) {
  452. /* yes; clear it */
  453. res = *resp;
  454. memset(res, '\0', size);
  455. } else {
  456. /* no, allocate and clear one */
  457. *resp = res = calloc(1, size);
  458. if (res == NULL) {
  459. __set_errno(ENOMEM);
  460. return (-1);
  461. }
  462. }
  463. /*
  464. * Pass 2:
  465. * export sorted list of result data
  466. */
  467. for (i = 0, p = res; i < n; ++i) {
  468. char *s;
  469. s = list[i]->key;
  470. while (*s)
  471. *p++ = *s++;
  472. *p++ = '=';
  473. s = list[i]->data;
  474. while (*s) {
  475. if ((*s == sep) || (*s == '\\'))
  476. *p++ = '\\'; /* escape */
  477. *p++ = *s++;
  478. }
  479. *p++ = sep;
  480. }
  481. *p = '\0'; /* terminate result */
  482. return size;
  483. }
  484. /*
  485. * himport()
  486. */
  487. /*
  488. * Import linearized data into hash table.
  489. *
  490. * This is the inverse function to hexport(): it takes a linear list
  491. * of "name=value" pairs and creates hash table entries from it.
  492. *
  493. * Entries without "value", i. e. consisting of only "name" or
  494. * "name=", will cause this entry to be deleted from the hash table.
  495. *
  496. * The "flag" argument can be used to control the behaviour: when the
  497. * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
  498. * new data will be added to an existing hash table; otherwise, old
  499. * data will be discarded and a new hash table will be created.
  500. *
  501. * The separator character for the "name=value" pairs can be selected,
  502. * so we both support importing from externally stored environment
  503. * data (separated by NUL characters) and from plain text files
  504. * (entries separated by newline characters).
  505. *
  506. * To allow for nicely formatted text input, leading white space
  507. * (sequences of SPACE and TAB chars) is ignored, and entries starting
  508. * (after removal of any leading white space) with a '#' character are
  509. * considered comments and ignored.
  510. *
  511. * [NOTE: this means that a variable name cannot start with a '#'
  512. * character.]
  513. *
  514. * When using a non-NUL separator character, backslash is used as
  515. * escape character in the value part, allowing for example for
  516. * multi-line values.
  517. *
  518. * In theory, arbitrary separator characters can be used, but only
  519. * '\0' and '\n' have really been tested.
  520. */
  521. int himport_r(struct hsearch_data *htab,
  522. const char *env, size_t size, const char sep, int flag)
  523. {
  524. char *data, *sp, *dp, *name, *value;
  525. /* Test for correct arguments. */
  526. if (htab == NULL) {
  527. __set_errno(EINVAL);
  528. return 0;
  529. }
  530. /* we allocate new space to make sure we can write to the array */
  531. if ((data = malloc(size)) == NULL) {
  532. debug("himport_r: can't malloc %d bytes\n", size);
  533. __set_errno(ENOMEM);
  534. return 0;
  535. }
  536. memcpy(data, env, size);
  537. dp = data;
  538. if ((flag & H_NOCLEAR) == 0) {
  539. /* Destroy old hash table if one exists */
  540. debug("Destroy Hash Table: %p table = %p\n", htab,
  541. htab->table);
  542. if (htab->table)
  543. hdestroy_r(htab);
  544. }
  545. /*
  546. * Create new hash table (if needed). The computation of the hash
  547. * table size is based on heuristics: in a sample of some 70+
  548. * existing systems we found an average size of 39+ bytes per entry
  549. * in the environment (for the whole key=value pair). Assuming a
  550. * size of 8 per entry (= safety factor of ~5) should provide enough
  551. * safety margin for any existing environment definitions and still
  552. * allow for more than enough dynamic additions. Note that the
  553. * "size" argument is supposed to give the maximum enviroment size
  554. * (CONFIG_ENV_SIZE). This heuristics will result in
  555. * unreasonably large numbers (and thus memory footprint) for
  556. * big flash environments (>8,000 entries for 64 KB
  557. * envrionment size), so we clip it to a reasonable value.
  558. * On the other hand we need to add some more entries for free
  559. * space when importing very small buffers. Both boundaries can
  560. * be overwritten in the board config file if needed.
  561. */
  562. if (!htab->table) {
  563. int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
  564. if (nent > CONFIG_ENV_MAX_ENTRIES)
  565. nent = CONFIG_ENV_MAX_ENTRIES;
  566. debug("Create Hash Table: N=%d\n", nent);
  567. if (hcreate_r(nent, htab) == 0) {
  568. free(data);
  569. return 0;
  570. }
  571. }
  572. /* Parse environment; allow for '\0' and 'sep' as separators */
  573. do {
  574. ENTRY e, *rv;
  575. /* skip leading white space */
  576. while ((*dp == ' ') || (*dp == '\t'))
  577. ++dp;
  578. /* skip comment lines */
  579. if (*dp == '#') {
  580. while (*dp && (*dp != sep))
  581. ++dp;
  582. ++dp;
  583. continue;
  584. }
  585. /* parse name */
  586. for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
  587. ;
  588. /* deal with "name" and "name=" entries (delete var) */
  589. if (*dp == '\0' || *(dp + 1) == '\0' ||
  590. *dp == sep || *(dp + 1) == sep) {
  591. if (*dp == '=')
  592. *dp++ = '\0';
  593. *dp++ = '\0'; /* terminate name */
  594. debug("DELETE CANDIDATE: \"%s\"\n", name);
  595. if (hdelete_r(name, htab) == 0)
  596. debug("DELETE ERROR ##############################\n");
  597. continue;
  598. }
  599. *dp++ = '\0'; /* terminate name */
  600. /* parse value; deal with escapes */
  601. for (value = sp = dp; *dp && (*dp != sep); ++dp) {
  602. if ((*dp == '\\') && *(dp + 1))
  603. ++dp;
  604. *sp++ = *dp;
  605. }
  606. *sp++ = '\0'; /* terminate value */
  607. ++dp;
  608. /* enter into hash table */
  609. e.key = name;
  610. e.data = value;
  611. hsearch_r(e, ENTER, &rv, htab);
  612. if (rv == NULL) {
  613. printf("himport_r: can't insert \"%s=%s\" into hash table\n",
  614. name, value);
  615. return 0;
  616. }
  617. debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
  618. htab, htab->filled, htab->size,
  619. rv, name, value);
  620. } while ((dp < data + size) && *dp); /* size check needed for text */
  621. /* without '\0' termination */
  622. debug("INSERT: free(data = %p)\n", data);
  623. free(data);
  624. debug("INSERT: done\n");
  625. return 1; /* everything OK */
  626. }