hashtable.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735
  1. /*
  2. * This implementation is based on code from uClibc-0.9.30.3 but was
  3. * modified and extended for use within U-Boot.
  4. *
  5. * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
  6. *
  7. * Original license header:
  8. *
  9. * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
  10. * This file is part of the GNU C Library.
  11. * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
  12. *
  13. * The GNU C Library is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * The GNU C Library is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with the GNU C Library; if not, write to the Free
  25. * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26. * 02111-1307 USA.
  27. */
  28. #include <errno.h>
  29. #include <malloc.h>
  30. #ifdef USE_HOSTCC /* HOST build */
  31. # include <string.h>
  32. # include <assert.h>
  33. # ifndef debug
  34. # ifdef DEBUG
  35. # define debug(fmt,args...) printf(fmt ,##args)
  36. # else
  37. # define debug(fmt,args...)
  38. # endif
  39. # endif
  40. #else /* U-Boot build */
  41. # include <common.h>
  42. # include <linux/string.h>
  43. #endif
  44. #ifndef CONFIG_ENV_MAX_ENTRIES /* maximum number of entries */
  45. #define CONFIG_ENV_MAX_ENTRIES 512
  46. #endif
  47. #include "search.h"
  48. /*
  49. * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  50. * [Knuth] The Art of Computer Programming, part 3 (6.4)
  51. */
  52. /*
  53. * The non-reentrant version use a global space for storing the hash table.
  54. */
  55. static struct hsearch_data htab;
  56. /*
  57. * The reentrant version has no static variables to maintain the state.
  58. * Instead the interface of all functions is extended to take an argument
  59. * which describes the current status.
  60. */
  61. typedef struct _ENTRY {
  62. unsigned int used;
  63. ENTRY entry;
  64. } _ENTRY;
  65. /*
  66. * hcreate()
  67. */
  68. /*
  69. * For the used double hash method the table size has to be a prime. To
  70. * correct the user given table size we need a prime test. This trivial
  71. * algorithm is adequate because
  72. * a) the code is (most probably) called a few times per program run and
  73. * b) the number is small because the table must fit in the core
  74. * */
  75. static int isprime(unsigned int number)
  76. {
  77. /* no even number will be passed */
  78. unsigned int div = 3;
  79. while (div * div < number && number % div != 0)
  80. div += 2;
  81. return number % div != 0;
  82. }
  83. int hcreate(size_t nel)
  84. {
  85. return hcreate_r(nel, &htab);
  86. }
  87. /*
  88. * Before using the hash table we must allocate memory for it.
  89. * Test for an existing table are done. We allocate one element
  90. * more as the found prime number says. This is done for more effective
  91. * indexing as explained in the comment for the hsearch function.
  92. * The contents of the table is zeroed, especially the field used
  93. * becomes zero.
  94. */
  95. int hcreate_r(size_t nel, struct hsearch_data *htab)
  96. {
  97. /* Test for correct arguments. */
  98. if (htab == NULL) {
  99. __set_errno(EINVAL);
  100. return 0;
  101. }
  102. /* There is still another table active. Return with error. */
  103. if (htab->table != NULL)
  104. return 0;
  105. /* Change nel to the first prime number not smaller as nel. */
  106. nel |= 1; /* make odd */
  107. while (!isprime(nel))
  108. nel += 2;
  109. htab->size = nel;
  110. htab->filled = 0;
  111. /* allocate memory and zero out */
  112. htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
  113. if (htab->table == NULL)
  114. return 0;
  115. /* everything went alright */
  116. return 1;
  117. }
  118. /*
  119. * hdestroy()
  120. */
  121. void hdestroy(void)
  122. {
  123. hdestroy_r(&htab);
  124. }
  125. /*
  126. * After using the hash table it has to be destroyed. The used memory can
  127. * be freed and the local static variable can be marked as not used.
  128. */
  129. void hdestroy_r(struct hsearch_data *htab)
  130. {
  131. int i;
  132. /* Test for correct arguments. */
  133. if (htab == NULL) {
  134. __set_errno(EINVAL);
  135. return;
  136. }
  137. /* free used memory */
  138. for (i = 1; i <= htab->size; ++i) {
  139. if (htab->table[i].used) {
  140. ENTRY *ep = &htab->table[i].entry;
  141. free(ep->key);
  142. free(ep->data);
  143. }
  144. }
  145. free(htab->table);
  146. /* the sign for an existing table is an value != NULL in htable */
  147. htab->table = NULL;
  148. }
  149. /*
  150. * hsearch()
  151. */
  152. /*
  153. * This is the search function. It uses double hashing with open addressing.
  154. * The argument item.key has to be a pointer to an zero terminated, most
  155. * probably strings of chars. The function for generating a number of the
  156. * strings is simple but fast. It can be replaced by a more complex function
  157. * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
  158. *
  159. * We use an trick to speed up the lookup. The table is created by hcreate
  160. * with one more element available. This enables us to use the index zero
  161. * special. This index will never be used because we store the first hash
  162. * index in the field used where zero means not used. Every other value
  163. * means used. The used field can be used as a first fast comparison for
  164. * equality of the stored and the parameter value. This helps to prevent
  165. * unnecessary expensive calls of strcmp.
  166. *
  167. * This implementation differs from the standard library version of
  168. * this function in a number of ways:
  169. *
  170. * - While the standard version does not make any assumptions about
  171. * the type of the stored data objects at all, this implementation
  172. * works with NUL terminated strings only.
  173. * - Instead of storing just pointers to the original objects, we
  174. * create local copies so the caller does not need to care about the
  175. * data any more.
  176. * - The standard implementation does not provide a way to update an
  177. * existing entry. This version will create a new entry or update an
  178. * existing one when both "action == ENTER" and "item.data != NULL".
  179. * - Instead of returning 1 on success, we return the index into the
  180. * internal hash table, which is also guaranteed to be positive.
  181. * This allows us direct access to the found hash table slot for
  182. * example for functions like hdelete().
  183. */
  184. ENTRY *hsearch(ENTRY item, ACTION action)
  185. {
  186. ENTRY *result;
  187. (void) hsearch_r(item, action, &result, &htab);
  188. return result;
  189. }
  190. int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
  191. struct hsearch_data *htab)
  192. {
  193. unsigned int hval;
  194. unsigned int count;
  195. unsigned int len = strlen(item.key);
  196. unsigned int idx;
  197. /* Compute an value for the given string. Perhaps use a better method. */
  198. hval = len;
  199. count = len;
  200. while (count-- > 0) {
  201. hval <<= 4;
  202. hval += item.key[count];
  203. }
  204. /*
  205. * First hash function:
  206. * simply take the modul but prevent zero.
  207. */
  208. hval %= htab->size;
  209. if (hval == 0)
  210. ++hval;
  211. /* The first index tried. */
  212. idx = hval;
  213. if (htab->table[idx].used) {
  214. /*
  215. * Further action might be required according to the
  216. * action value.
  217. */
  218. unsigned hval2;
  219. if (htab->table[idx].used == hval
  220. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  221. /* Overwrite existing value? */
  222. if ((action == ENTER) && (item.data != NULL)) {
  223. free(htab->table[idx].entry.data);
  224. htab->table[idx].entry.data =
  225. strdup(item.data);
  226. if (!htab->table[idx].entry.data) {
  227. __set_errno(ENOMEM);
  228. *retval = NULL;
  229. return 0;
  230. }
  231. }
  232. /* return found entry */
  233. *retval = &htab->table[idx].entry;
  234. return idx;
  235. }
  236. /*
  237. * Second hash function:
  238. * as suggested in [Knuth]
  239. */
  240. hval2 = 1 + hval % (htab->size - 2);
  241. do {
  242. /*
  243. * Because SIZE is prime this guarantees to
  244. * step through all available indices.
  245. */
  246. if (idx <= hval2)
  247. idx = htab->size + idx - hval2;
  248. else
  249. idx -= hval2;
  250. /*
  251. * If we visited all entries leave the loop
  252. * unsuccessfully.
  253. */
  254. if (idx == hval)
  255. break;
  256. /* If entry is found use it. */
  257. if ((htab->table[idx].used == hval)
  258. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  259. /* Overwrite existing value? */
  260. if ((action == ENTER) && (item.data != NULL)) {
  261. free(htab->table[idx].entry.data);
  262. htab->table[idx].entry.data =
  263. strdup(item.data);
  264. if (!htab->table[idx].entry.data) {
  265. __set_errno(ENOMEM);
  266. *retval = NULL;
  267. return 0;
  268. }
  269. }
  270. /* return found entry */
  271. *retval = &htab->table[idx].entry;
  272. return idx;
  273. }
  274. }
  275. while (htab->table[idx].used);
  276. }
  277. /* An empty bucket has been found. */
  278. if (action == ENTER) {
  279. /*
  280. * If table is full and another entry should be
  281. * entered return with error.
  282. */
  283. if (htab->filled == htab->size) {
  284. __set_errno(ENOMEM);
  285. *retval = NULL;
  286. return 0;
  287. }
  288. /*
  289. * Create new entry;
  290. * create copies of item.key and item.data
  291. */
  292. htab->table[idx].used = hval;
  293. htab->table[idx].entry.key = strdup(item.key);
  294. htab->table[idx].entry.data = strdup(item.data);
  295. if (!htab->table[idx].entry.key ||
  296. !htab->table[idx].entry.data) {
  297. __set_errno(ENOMEM);
  298. *retval = NULL;
  299. return 0;
  300. }
  301. ++htab->filled;
  302. /* return new entry */
  303. *retval = &htab->table[idx].entry;
  304. return 1;
  305. }
  306. __set_errno(ESRCH);
  307. *retval = NULL;
  308. return 0;
  309. }
  310. /*
  311. * hdelete()
  312. */
  313. /*
  314. * The standard implementation of hsearch(3) does not provide any way
  315. * to delete any entries from the hash table. We extend the code to
  316. * do that.
  317. */
  318. int hdelete(const char *key)
  319. {
  320. return hdelete_r(key, &htab);
  321. }
  322. int hdelete_r(const char *key, struct hsearch_data *htab)
  323. {
  324. ENTRY e, *ep;
  325. int idx;
  326. debug("hdelete: DELETE key \"%s\"\n", key);
  327. e.key = (char *)key;
  328. if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
  329. __set_errno(ESRCH);
  330. return 0; /* not found */
  331. }
  332. /* free used ENTRY */
  333. debug("hdelete: DELETING key \"%s\"\n", key);
  334. free(ep->key);
  335. free(ep->data);
  336. htab->table[idx].used = 0;
  337. --htab->filled;
  338. return 1;
  339. }
  340. /*
  341. * hexport()
  342. */
  343. /*
  344. * Export the data stored in the hash table in linearized form.
  345. *
  346. * Entries are exported as "name=value" strings, separated by an
  347. * arbitrary (non-NUL, of course) separator character. This allows to
  348. * use this function both when formatting the U-Boot environment for
  349. * external storage (using '\0' as separator), but also when using it
  350. * for the "printenv" command to print all variables, simply by using
  351. * as '\n" as separator. This can also be used for new features like
  352. * exporting the environment data as text file, including the option
  353. * for later re-import.
  354. *
  355. * The entries in the result list will be sorted by ascending key
  356. * values.
  357. *
  358. * If the separator character is different from NUL, then any
  359. * separator characters and backslash characters in the values will
  360. * be escaped by a preceeding backslash in output. This is needed for
  361. * example to enable multi-line values, especially when the output
  362. * shall later be parsed (for example, for re-import).
  363. *
  364. * There are several options how the result buffer is handled:
  365. *
  366. * *resp size
  367. * -----------
  368. * NULL 0 A string of sufficient length will be allocated.
  369. * NULL >0 A string of the size given will be
  370. * allocated. An error will be returned if the size is
  371. * not sufficient. Any unused bytes in the string will
  372. * be '\0'-padded.
  373. * !NULL 0 The user-supplied buffer will be used. No length
  374. * checking will be performed, i. e. it is assumed that
  375. * the buffer size will always be big enough. DANGEROUS.
  376. * !NULL >0 The user-supplied buffer will be used. An error will
  377. * be returned if the size is not sufficient. Any unused
  378. * bytes in the string will be '\0'-padded.
  379. */
  380. ssize_t hexport(const char sep, char **resp, size_t size)
  381. {
  382. return hexport_r(&htab, sep, resp, size);
  383. }
  384. static int cmpkey(const void *p1, const void *p2)
  385. {
  386. ENTRY *e1 = *(ENTRY **) p1;
  387. ENTRY *e2 = *(ENTRY **) p2;
  388. return (strcmp(e1->key, e2->key));
  389. }
  390. ssize_t hexport_r(struct hsearch_data *htab, const char sep,
  391. char **resp, size_t size)
  392. {
  393. ENTRY *list[htab->size];
  394. char *res, *p;
  395. size_t totlen;
  396. int i, n;
  397. /* Test for correct arguments. */
  398. if ((resp == NULL) || (htab == NULL)) {
  399. __set_errno(EINVAL);
  400. return (-1);
  401. }
  402. debug("EXPORT table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
  403. htab, htab->size, htab->filled, size);
  404. /*
  405. * Pass 1:
  406. * search used entries,
  407. * save addresses and compute total length
  408. */
  409. for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
  410. if (htab->table[i].used) {
  411. ENTRY *ep = &htab->table[i].entry;
  412. list[n++] = ep;
  413. totlen += strlen(ep->key) + 2;
  414. if (sep == '\0') {
  415. totlen += strlen(ep->data);
  416. } else { /* check if escapes are needed */
  417. char *s = ep->data;
  418. while (*s) {
  419. ++totlen;
  420. /* add room for needed escape chars */
  421. if ((*s == sep) || (*s == '\\'))
  422. ++totlen;
  423. ++s;
  424. }
  425. }
  426. totlen += 2; /* for '=' and 'sep' char */
  427. }
  428. }
  429. #ifdef DEBUG
  430. /* Pass 1a: print unsorted list */
  431. printf("Unsorted: n=%d\n", n);
  432. for (i = 0; i < n; ++i) {
  433. printf("\t%3d: %p ==> %-10s => %s\n",
  434. i, list[i], list[i]->key, list[i]->data);
  435. }
  436. #endif
  437. /* Sort list by keys */
  438. qsort(list, n, sizeof(ENTRY *), cmpkey);
  439. /* Check if the user supplied buffer size is sufficient */
  440. if (size) {
  441. if (size < totlen + 1) { /* provided buffer too small */
  442. debug("### buffer too small: %d, but need %d\n",
  443. size, totlen + 1);
  444. __set_errno(ENOMEM);
  445. return (-1);
  446. }
  447. } else {
  448. size = totlen + 1;
  449. }
  450. /* Check if the user provided a buffer */
  451. if (*resp) {
  452. /* yes; clear it */
  453. res = *resp;
  454. memset(res, '\0', size);
  455. } else {
  456. /* no, allocate and clear one */
  457. *resp = res = calloc(1, size);
  458. if (res == NULL) {
  459. __set_errno(ENOMEM);
  460. return (-1);
  461. }
  462. }
  463. /*
  464. * Pass 2:
  465. * export sorted list of result data
  466. */
  467. for (i = 0, p = res; i < n; ++i) {
  468. char *s;
  469. s = list[i]->key;
  470. while (*s)
  471. *p++ = *s++;
  472. *p++ = '=';
  473. s = list[i]->data;
  474. while (*s) {
  475. if ((*s == sep) || (*s == '\\'))
  476. *p++ = '\\'; /* escape */
  477. *p++ = *s++;
  478. }
  479. *p++ = sep;
  480. }
  481. *p = '\0'; /* terminate result */
  482. return size;
  483. }
  484. /*
  485. * himport()
  486. */
  487. /*
  488. * Import linearized data into hash table.
  489. *
  490. * This is the inverse function to hexport(): it takes a linear list
  491. * of "name=value" pairs and creates hash table entries from it.
  492. *
  493. * Entries without "value", i. e. consisting of only "name" or
  494. * "name=", will cause this entry to be deleted from the hash table.
  495. *
  496. * The "flag" argument can be used to control the behaviour: when the
  497. * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
  498. * new data will be added to an existing hash table; otherwise, old
  499. * data will be discarded and a new hash table will be created.
  500. *
  501. * The separator character for the "name=value" pairs can be selected,
  502. * so we both support importing from externally stored environment
  503. * data (separated by NUL characters) and from plain text files
  504. * (entries separated by newline characters).
  505. *
  506. * To allow for nicely formatted text input, leading white space
  507. * (sequences of SPACE and TAB chars) is ignored, and entries starting
  508. * (after removal of any leading white space) with a '#' character are
  509. * considered comments and ignored.
  510. *
  511. * [NOTE: this means that a variable name cannot start with a '#'
  512. * character.]
  513. *
  514. * When using a non-NUL separator character, backslash is used as
  515. * escape character in the value part, allowing for example for
  516. * multi-line values.
  517. *
  518. * In theory, arbitrary separator characters can be used, but only
  519. * '\0' and '\n' have really been tested.
  520. */
  521. int himport(const char *env, size_t size, const char sep, int flag)
  522. {
  523. return himport_r(&htab, env, size, sep, flag);
  524. }
  525. int himport_r(struct hsearch_data *htab,
  526. const char *env, size_t size, const char sep, int flag)
  527. {
  528. char *data, *sp, *dp, *name, *value;
  529. /* Test for correct arguments. */
  530. if (htab == NULL) {
  531. __set_errno(EINVAL);
  532. return 0;
  533. }
  534. /* we allocate new space to make sure we can write to the array */
  535. if ((data = malloc(size)) == NULL) {
  536. debug("himport_r: can't malloc %d bytes\n", size);
  537. __set_errno(ENOMEM);
  538. return 0;
  539. }
  540. memcpy(data, env, size);
  541. dp = data;
  542. if ((flag & H_NOCLEAR) == 0) {
  543. /* Destroy old hash table if one exists */
  544. debug("Destroy Hash Table: %p table = %p\n", htab,
  545. htab->table);
  546. if (htab->table)
  547. hdestroy_r(htab);
  548. }
  549. /*
  550. * Create new hash table (if needed). The computation of the hash
  551. * table size is based on heuristics: in a sample of some 70+
  552. * existing systems we found an average size of 39+ bytes per entry
  553. * in the environment (for the whole key=value pair). Assuming a
  554. * size of 8 per entry (= safety factor of ~5) should provide enough
  555. * safety margin for any existing environment definitions and still
  556. * allow for more than enough dynamic additions. Note that the
  557. * "size" argument is supposed to give the maximum enviroment size
  558. * (CONFIG_ENV_SIZE). This heuristics will result in
  559. * unreasonably large numbers (and thus memory footprint) for
  560. * big flash environments (>8,000 entries for 64 KB
  561. * envrionment size), so we clip it to a reasonable value
  562. * (which can be overwritten in the board config file if
  563. * needed).
  564. */
  565. if (!htab->table) {
  566. int nent = size / 8;
  567. if (nent > CONFIG_ENV_MAX_ENTRIES)
  568. nent = CONFIG_ENV_MAX_ENTRIES;
  569. debug("Create Hash Table: N=%d\n", nent);
  570. if (hcreate_r(nent, htab) == 0) {
  571. free(data);
  572. return 0;
  573. }
  574. }
  575. /* Parse environment; allow for '\0' and 'sep' as separators */
  576. do {
  577. ENTRY e, *rv;
  578. /* skip leading white space */
  579. while ((*dp == ' ') || (*dp == '\t'))
  580. ++dp;
  581. /* skip comment lines */
  582. if (*dp == '#') {
  583. while (*dp && (*dp != sep))
  584. ++dp;
  585. ++dp;
  586. continue;
  587. }
  588. /* parse name */
  589. for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
  590. ;
  591. /* deal with "name" and "name=" entries (delete var) */
  592. if (*dp == '\0' || *(dp + 1) == '\0' ||
  593. *dp == sep || *(dp + 1) == sep) {
  594. if (*dp == '=')
  595. *dp++ = '\0';
  596. *dp++ = '\0'; /* terminate name */
  597. debug("DELETE CANDIDATE: \"%s\"\n", name);
  598. if (hdelete_r(name, htab) == 0)
  599. debug("DELETE ERROR ##############################\n");
  600. continue;
  601. }
  602. *dp++ = '\0'; /* terminate name */
  603. /* parse value; deal with escapes */
  604. for (value = sp = dp; *dp && (*dp != sep); ++dp) {
  605. if ((*dp == '\\') && *(dp + 1))
  606. ++dp;
  607. *sp++ = *dp;
  608. }
  609. *sp++ = '\0'; /* terminate value */
  610. ++dp;
  611. /* enter into hash table */
  612. e.key = name;
  613. e.data = value;
  614. hsearch_r(e, ENTER, &rv, htab);
  615. if (rv == NULL) {
  616. printf("himport_r: can't insert \"%s=%s\" into hash table\n",
  617. name, value);
  618. return 0;
  619. }
  620. debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
  621. htab, htab->filled, htab->size,
  622. rv, name, value);
  623. } while ((dp < data + size) && *dp); /* size check needed for text */
  624. /* without '\0' termination */
  625. debug("INSERT: free(data = %p)\n", data);
  626. free(data);
  627. debug("INSERT: done\n");
  628. return 1; /* everything OK */
  629. }