cpu.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /*
  2. * Copyright 2014 Freescale Semiconductor, Inc.
  3. *
  4. * SPDX-License-Identifier: GPL-2.0+
  5. */
  6. #include <common.h>
  7. #include <asm/io.h>
  8. #include <asm/system.h>
  9. #include <asm/armv8/mmu.h>
  10. #include <asm/io.h>
  11. #include <asm/arch-fsl-lsch3/immap_lsch3.h>
  12. #include "cpu.h"
  13. #include "speed.h"
  14. DECLARE_GLOBAL_DATA_PTR;
  15. #ifndef CONFIG_SYS_DCACHE_OFF
  16. /*
  17. * To start MMU before DDR is available, we create MMU table in SRAM.
  18. * The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three
  19. * levels of translation tables here to cover 40-bit address space.
  20. * We use 4KB granule size, with 40 bits physical address, T0SZ=24
  21. * Level 0 IA[39], table address @0
  22. * Level 1 IA[31:30], table address @01000, 0x2000
  23. * Level 2 IA[29:21], table address @0x3000
  24. */
  25. #define SECTION_SHIFT_L0 39UL
  26. #define SECTION_SHIFT_L1 30UL
  27. #define SECTION_SHIFT_L2 21UL
  28. #define BLOCK_SIZE_L0 0x8000000000UL
  29. #define BLOCK_SIZE_L1 (1 << SECTION_SHIFT_L1)
  30. #define BLOCK_SIZE_L2 (1 << SECTION_SHIFT_L2)
  31. #define CONFIG_SYS_IFC_BASE 0x30000000
  32. #define CONFIG_SYS_IFC_SIZE 0x10000000
  33. #define CONFIG_SYS_IFC_BASE2 0x500000000
  34. #define CONFIG_SYS_IFC_SIZE2 0x100000000
  35. #define TCR_EL2_PS_40BIT (2 << 16)
  36. #define LSCH3_VA_BITS (40)
  37. #define LSCH3_TCR (TCR_TG0_4K | \
  38. TCR_EL2_PS_40BIT | \
  39. TCR_SHARED_NON | \
  40. TCR_ORGN_NC | \
  41. TCR_IRGN_NC | \
  42. TCR_T0SZ(LSCH3_VA_BITS))
  43. /*
  44. * Final MMU
  45. * Let's start from the same layout as early MMU and modify as needed.
  46. * IFC regions will be cache-inhibit.
  47. */
  48. #define FINAL_QBMAN_CACHED_MEM 0x818000000UL
  49. #define FINAL_QBMAN_CACHED_SIZE 0x4000000
  50. static inline void early_mmu_setup(void)
  51. {
  52. int el;
  53. u64 i;
  54. u64 section_l1t0, section_l1t1, section_l2;
  55. u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE;
  56. u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000);
  57. u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000);
  58. u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000);
  59. level0_table[0] =
  60. (u64)level1_table_0 | PMD_TYPE_TABLE;
  61. level0_table[1] =
  62. (u64)level1_table_1 | PMD_TYPE_TABLE;
  63. /*
  64. * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
  65. * set level 1 table 1 to cache enabled, covering 512GB to 1TB
  66. * set level 2 table to cache-inhibit, covering 0 to 1GB
  67. */
  68. section_l1t0 = 0;
  69. section_l1t1 = BLOCK_SIZE_L0;
  70. section_l2 = 0;
  71. for (i = 0; i < 512; i++) {
  72. set_pgtable_section(level1_table_0, i, section_l1t0,
  73. MT_DEVICE_NGNRNE);
  74. set_pgtable_section(level1_table_1, i, section_l1t1,
  75. MT_NORMAL);
  76. set_pgtable_section(level2_table, i, section_l2,
  77. MT_DEVICE_NGNRNE);
  78. section_l1t0 += BLOCK_SIZE_L1;
  79. section_l1t1 += BLOCK_SIZE_L1;
  80. section_l2 += BLOCK_SIZE_L2;
  81. }
  82. level1_table_0[0] =
  83. (u64)level2_table | PMD_TYPE_TABLE;
  84. level1_table_0[1] =
  85. 0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT |
  86. PMD_ATTRINDX(MT_DEVICE_NGNRNE);
  87. level1_table_0[2] =
  88. 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
  89. PMD_ATTRINDX(MT_NORMAL);
  90. level1_table_0[3] =
  91. 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
  92. PMD_ATTRINDX(MT_NORMAL);
  93. /* Rewrite table to enable cache */
  94. set_pgtable_section(level2_table,
  95. CONFIG_SYS_FSL_OCRAM_BASE >> SECTION_SHIFT_L2,
  96. CONFIG_SYS_FSL_OCRAM_BASE,
  97. MT_NORMAL);
  98. for (i = CONFIG_SYS_IFC_BASE >> SECTION_SHIFT_L2;
  99. i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE)
  100. >> SECTION_SHIFT_L2; i++) {
  101. section_l2 = i << SECTION_SHIFT_L2;
  102. set_pgtable_section(level2_table, i,
  103. section_l2, MT_NORMAL);
  104. }
  105. el = current_el();
  106. set_ttbr_tcr_mair(el, (u64)level0_table, LSCH3_TCR, MEMORY_ATTRIBUTES);
  107. set_sctlr(get_sctlr() | CR_M);
  108. }
  109. /*
  110. * This final tale looks similar to early table, but different in detail.
  111. * These tables are in regular memory. Cache on IFC is disabled. One sub table
  112. * is added to enable cache for QBMan.
  113. */
  114. static inline void final_mmu_setup(void)
  115. {
  116. int el;
  117. u64 i, tbl_base, tbl_limit, section_base;
  118. u64 section_l1t0, section_l1t1, section_l2;
  119. u64 *level0_table = (u64 *)gd->arch.tlb_addr;
  120. u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000);
  121. u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000);
  122. u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000);
  123. u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000);
  124. level0_table[0] =
  125. (u64)level1_table_0 | PMD_TYPE_TABLE;
  126. level0_table[1] =
  127. (u64)level1_table_1 | PMD_TYPE_TABLE;
  128. /*
  129. * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
  130. * set level 1 table 1 to cache enabled, covering 512GB to 1TB
  131. * set level 2 table 0 to cache-inhibit, covering 0 to 1GB
  132. */
  133. section_l1t0 = 0;
  134. section_l1t1 = BLOCK_SIZE_L0;
  135. section_l2 = 0;
  136. for (i = 0; i < 512; i++) {
  137. set_pgtable_section(level1_table_0, i, section_l1t0,
  138. MT_DEVICE_NGNRNE);
  139. set_pgtable_section(level1_table_1, i, section_l1t1,
  140. MT_NORMAL);
  141. set_pgtable_section(level2_table_0, i, section_l2,
  142. MT_DEVICE_NGNRNE);
  143. section_l1t0 += BLOCK_SIZE_L1;
  144. section_l1t1 += BLOCK_SIZE_L1;
  145. section_l2 += BLOCK_SIZE_L2;
  146. }
  147. level1_table_0[0] =
  148. (u64)level2_table_0 | PMD_TYPE_TABLE;
  149. level1_table_0[2] =
  150. 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
  151. PMD_ATTRINDX(MT_NORMAL);
  152. level1_table_0[3] =
  153. 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
  154. PMD_ATTRINDX(MT_NORMAL);
  155. /* Rewrite table to enable cache */
  156. set_pgtable_section(level2_table_0,
  157. CONFIG_SYS_FSL_OCRAM_BASE >> SECTION_SHIFT_L2,
  158. CONFIG_SYS_FSL_OCRAM_BASE,
  159. MT_NORMAL);
  160. /*
  161. * Fill in other part of tables if cache is needed
  162. * If finer granularity than 1GB is needed, sub table
  163. * should be created.
  164. */
  165. section_base = FINAL_QBMAN_CACHED_MEM & ~(BLOCK_SIZE_L1 - 1);
  166. i = section_base >> SECTION_SHIFT_L1;
  167. level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE;
  168. section_l2 = section_base;
  169. for (i = 0; i < 512; i++) {
  170. set_pgtable_section(level2_table_1, i, section_l2,
  171. MT_DEVICE_NGNRNE);
  172. section_l2 += BLOCK_SIZE_L2;
  173. }
  174. tbl_base = FINAL_QBMAN_CACHED_MEM & (BLOCK_SIZE_L1 - 1);
  175. tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) &
  176. (BLOCK_SIZE_L1 - 1);
  177. for (i = tbl_base >> SECTION_SHIFT_L2;
  178. i < tbl_limit >> SECTION_SHIFT_L2; i++) {
  179. section_l2 = section_base + (i << SECTION_SHIFT_L2);
  180. set_pgtable_section(level2_table_1, i,
  181. section_l2, MT_NORMAL);
  182. }
  183. /* flush new MMU table */
  184. flush_dcache_range(gd->arch.tlb_addr,
  185. gd->arch.tlb_addr + gd->arch.tlb_size);
  186. /* point TTBR to the new table */
  187. el = current_el();
  188. asm volatile("dsb sy");
  189. if (el == 1) {
  190. asm volatile("msr ttbr0_el1, %0"
  191. : : "r" ((u64)level0_table) : "memory");
  192. } else if (el == 2) {
  193. asm volatile("msr ttbr0_el2, %0"
  194. : : "r" ((u64)level0_table) : "memory");
  195. } else if (el == 3) {
  196. asm volatile("msr ttbr0_el3, %0"
  197. : : "r" ((u64)level0_table) : "memory");
  198. } else {
  199. hang();
  200. }
  201. asm volatile("isb");
  202. /*
  203. * MMU is already enabled, just need to invalidate TLB to load the
  204. * new table. The new table is compatible with the current table, if
  205. * MMU somehow walks through the new table before invalidation TLB,
  206. * it still works. So we don't need to turn off MMU here.
  207. */
  208. }
  209. int arch_cpu_init(void)
  210. {
  211. icache_enable();
  212. __asm_invalidate_dcache_all();
  213. __asm_invalidate_tlb_all();
  214. early_mmu_setup();
  215. set_sctlr(get_sctlr() | CR_C);
  216. return 0;
  217. }
  218. /*
  219. * flush_l3_cache
  220. * Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
  221. * state, by writing to HP-F P-state request register.
  222. * Fixme: This function should moved to a common file if other SoCs also use
  223. * the same Dickens.
  224. */
  225. #define HNF0_PSTATE_REQ 0x04200010
  226. #define HNF1_PSTATE_REQ 0x04210010
  227. #define HNF2_PSTATE_REQ 0x04220010
  228. #define HNF3_PSTATE_REQ 0x04230010
  229. #define HNF4_PSTATE_REQ 0x04240010
  230. #define HNF5_PSTATE_REQ 0x04250010
  231. #define HNF6_PSTATE_REQ 0x04260010
  232. #define HNF7_PSTATE_REQ 0x04270010
  233. #define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC)
  234. #define HNFPSTAT_FAM 0x3
  235. #define HNFPSTAT_SFONLY 0x01
  236. static void hnf_pstate_req(u64 *ptr, u64 state)
  237. {
  238. int timeout = 1000;
  239. out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
  240. ptr++;
  241. /* checking if the transition is completed */
  242. while (timeout > 0) {
  243. if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
  244. break;
  245. udelay(100);
  246. timeout--;
  247. }
  248. }
  249. void flush_l3_cache(void)
  250. {
  251. hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
  252. hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
  253. hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
  254. hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
  255. hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
  256. hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
  257. hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
  258. hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
  259. hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
  260. hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
  261. hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
  262. hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
  263. hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
  264. hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
  265. hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
  266. hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
  267. }
  268. /*
  269. * This function is called from lib/board.c.
  270. * It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
  271. * There is no need to disable d-cache for this operation.
  272. */
  273. void enable_caches(void)
  274. {
  275. final_mmu_setup();
  276. __asm_invalidate_tlb_all();
  277. }
  278. #endif
  279. static inline u32 initiator_type(u32 cluster, int init_id)
  280. {
  281. struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
  282. u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
  283. u32 type = in_le32(&gur->tp_ityp[idx]);
  284. if (type & TP_ITYP_AV)
  285. return type;
  286. return 0;
  287. }
  288. u32 cpu_mask(void)
  289. {
  290. struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
  291. int i = 0, count = 0;
  292. u32 cluster, type, mask = 0;
  293. do {
  294. int j;
  295. cluster = in_le32(&gur->tp_cluster[i].lower);
  296. for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
  297. type = initiator_type(cluster, j);
  298. if (type) {
  299. if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
  300. mask |= 1 << count;
  301. count++;
  302. }
  303. }
  304. i++;
  305. } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
  306. return mask;
  307. }
  308. /*
  309. * Return the number of cores on this SOC.
  310. */
  311. int cpu_numcores(void)
  312. {
  313. return hweight32(cpu_mask());
  314. }
  315. int fsl_qoriq_core_to_cluster(unsigned int core)
  316. {
  317. struct ccsr_gur __iomem *gur =
  318. (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
  319. int i = 0, count = 0;
  320. u32 cluster;
  321. do {
  322. int j;
  323. cluster = in_le32(&gur->tp_cluster[i].lower);
  324. for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
  325. if (initiator_type(cluster, j)) {
  326. if (count == core)
  327. return i;
  328. count++;
  329. }
  330. }
  331. i++;
  332. } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
  333. return -1; /* cannot identify the cluster */
  334. }
  335. u32 fsl_qoriq_core_to_type(unsigned int core)
  336. {
  337. struct ccsr_gur __iomem *gur =
  338. (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
  339. int i = 0, count = 0;
  340. u32 cluster, type;
  341. do {
  342. int j;
  343. cluster = in_le32(&gur->tp_cluster[i].lower);
  344. for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
  345. type = initiator_type(cluster, j);
  346. if (type) {
  347. if (count == core)
  348. return type;
  349. count++;
  350. }
  351. }
  352. i++;
  353. } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
  354. return -1; /* cannot identify the cluster */
  355. }
  356. #ifdef CONFIG_DISPLAY_CPUINFO
  357. int print_cpuinfo(void)
  358. {
  359. struct sys_info sysinfo;
  360. char buf[32];
  361. unsigned int i, core;
  362. u32 type;
  363. get_sys_info(&sysinfo);
  364. puts("Clock Configuration:");
  365. for_each_cpu(i, core, cpu_numcores(), cpu_mask()) {
  366. if (!(i % 3))
  367. puts("\n ");
  368. type = TP_ITYP_VER(fsl_qoriq_core_to_type(core));
  369. printf("CPU%d(%s):%-4s MHz ", core,
  370. type == TY_ITYP_VER_A7 ? "A7 " :
  371. (type == TY_ITYP_VER_A53 ? "A53" :
  372. (type == TY_ITYP_VER_A57 ? "A57" : " ")),
  373. strmhz(buf, sysinfo.freq_processor[core]));
  374. }
  375. printf("\n Bus: %-4s MHz ",
  376. strmhz(buf, sysinfo.freq_systembus));
  377. printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus));
  378. puts("\n");
  379. return 0;
  380. }
  381. #endif