
#include <linux/linkage.h>
#include <mach/hardware.h>
#include <mach/platform.h>
#include "pm.h"

#define CMU_BASE			(IO_ADDRESS(DMW_CMU_BASE))
#define SYSCFG_BASE			(IO_ADDRESS(DMW_SYSCFG_BASE))

#define CACHE_LINE_SHIFT		6
#define CACHE_LINE_SIZE			(1 << CACHE_LINE_SHIFT)

#define lowpower_control_reg		(20*4)
#define lowpower_control_msk		(0x1f << 8)
#define lowpower_control_offset		8
#define lowpower_auto_enable_reg	(22*4)
#define lowpower_auto_enable_msk	(0x1f << 0)
#define lowpower_auto_enable_offset	0
#define phy_pad_ctrl_reg_0_reg		(0x81*4)
#define phy_pad_ctrl_reg_0_pad_type	(1 << 8)

#define DMW_CMU_PLL_PD			(1 << 15)
#define DMW_CMU_PLL_LD			(1 << 18)
#define DMW_CMU_SWCLKENR2_DRAM		(1 << 12)

#define DMW_SYSCFG_DRAM_CTL_BUSY	(1 << 29)
#define DMW_SYSCFG_DRAM_PORT_BUSY	(0xff << 20)

#define GPIO_DATA			0x00
#define GPIO_DATA_SET			0x04
#define GPIO_DATA_CLR			0x08

	.text

/*
 * Set base_reg to the right bank address and convert gpio_reg into a bitmask.
 * If the GPIO is invalid then we set the bitmask to zero.
 * base_reg should contain the GPIO base address when calling this macro.
 *
 * This macro is using r4, r5 as scracth registers.
 */
.macro parse_gpio gpio_reg base_reg
	cmp	\gpio_reg, #0
	movmi	\gpio_reg, #0
	bmi	10f

	mov	r4, #0x60
	lsr	r5, \gpio_reg, #5
	mla	\base_reg, r4, r5, \base_reg		@ \base_reg = r4*r5 + \base_reg
	and	r4, \gpio_reg, #0x1f
	mov	\gpio_reg, #1
	lsl	\gpio_reg, \gpio_reg, r4
10:
.endm

/*
 * dmw_sleep_selfrefresh(u32 mode, u32 denali_base, u32 overdrive)
 *
 * Puts the RAM into self refresh.
 *
 * The whole function runs out of ICACHE and TLB because no memory access is
 * possible after the RAM is in self refresh mode.
 *
 * Must be called with IRQs and FIQs disabled!
 *
 * r0: mode
 * r1: DENALI_BASE
 * r2: overdrive GPIO
 * r3: DRAM RTT GPIO
 * r4, r5, r6, r7: scratch registers
 * r8: CMU_BASE
 * r9: SYSCFG_BASE
 * r10: OVERDRIVE dmw_gpio_base
 * r11: DRAM RTT dmw_gpio_base
 */
ENTRY(dmw_sleep_selfrefresh)
	push	{r4-r11}
	ldr	r8, =CMU_BASE
	ldr	r9, =SYSCFG_BASE
	ldr	r10, =dmw_gpio_base
	ldr	r10, [r10]			@ r10 = virtual address of DMW_GPIO_BASE
	mov	r11, r10			@ r11 = virtual address of DMW_GPIO_BASE

	@ skip cache prefetching if DRAM is kept accessible

	tst	r0, #__SLEEP_DDR_12M
	beq	.Lskip_prefetch

	/* Set r10 = overide gpio bank base. r2 = overide gpio bitmask */
	parse_gpio r2, r10

	/* Set r11 = DRAM RTT gpio bank base. r3 = DRAM RTT gpio bitmask */
	parse_gpio r3, r11

	adr	r4, .L__start_of_prefetch
	adr	r5, .L__end_of_prefetch
	bic	r4, r4, #(CACHE_LINE_SIZE-1)

	/*
	 * Prefetch ITLB. The kernel uses 1M page tables for its code so its
	 * enough to prefetch start and end of function. We do it just for the
	 * end because the start is already cached (we already execute it right
	 * now).
	 */
	mcr	p15, 0, r5, c10, c1, 1		@ Prefetch I-TLB

	/*
	 * Prefetch DTLB of peripherals. Invalidate the entire DTLB first
	 * because the entries might evict each other by the prefetch. By
	 * cleaning the complete DTLB we make sure that no entries fall off the
	 * end.
	 */
	mcr	p15, 0, r0, c8, c6, 0		@ Invalidate D-TLB
	mcr	p15, 0, r1, c10, c1, 0		@ Prefetch D-TLB of DDR controller
	mcr	p15, 0, r8, c10, c1, 0		@ Prefetch D-TLB of CMU
	mcr	p15, 0, r9, c10, c1, 0		@ Prefetch D-TLB of SYSCFG
	mcr	p15, 0, r10, c10, c1, 0		@ Prefetch D-TLB of GPIO

	/*
	 * Prefetch the whole function into L2CACHE. We're using the L2 preload
	 * engine to first clean the involved cache lines before filling them.
	 * Note that doing a simple loop to "ldr" a word from every cache line
	 * is dangerous because it might evict dirty cache lines which the CPU
	 * writes back asynchronously!
	 */
	sub	r5, r5, r4			@ calculate number of cache lines
	bic	r5, r5, #(CACHE_LINE_SIZE-1)	@ the PLE actually tranfers n+1 lines

	mov	r6, #0
	mov	r7, #(1 << 30)

	@ clean and invalidate the involved cache lines

	mcr	p15, 0, r6, c11, c3, 2		@ Clear PLE
	mcr	p15, 0, r4, c11, c5, 0		@ start address
	mcr	p15, 0, r5, c11, c7, 0		@ end address
	mcr	p15, 0, r7, c11, c4, 0		@ transfer L2 -> DRAM
	mcr	p15, 0, r6, c11, c3, 1		@ start
1:
	mrc	p15, 0, r7, c11, c8, 0
	teq	r7, #3
	bne	1b

	@ now fill the cache lines

	mcr	p15, 0, r6, c11, c3, 2		@ Clear PLE
	mcr	p15, 0, r4, c11, c5, 0		@ start address
	mcr	p15, 0, r5, c11, c7, 0		@ end address
	mcr	p15, 0, r6, c11, c4, 0		@ transfer DRAM -> L2
	mcr	p15, 0, r6, c11, c3, 1		@ start
1:
	mrc	p15, 0, r7, c11, c8, 0
	teq	r7, #3
	bne	1b

.L__start_of_prefetch:
	/*
	 * Wait for DRAM controller to get idle. Throw a panic if this does not
	 * happen in a reasonable time.
	 */
	mov	r5, #0x100000
1:
	subs	r5, r5, #1
	beq	.Ldram_lockup
	ldr	r4, [r9, #DMW_SYSCFG_DRAMCTL_GCR1]
	tst	r4, #DMW_SYSCFG_DRAM_CTL_BUSY
	tsteq	r4, #DMW_SYSCFG_DRAM_PORT_BUSY
	bne	1b

.Lskip_prefetch:
	/*
	 * Put the DDR controller in low power mode. If we switch off PLL3 we
	 * use manual mode, otherwise automatic mode.
	 */
	and	r5, r0, #0xff			@ extract desired mode
	tst	r0, #__SLEEP_DDR_12M

	ldr	r4, [r1, #lowpower_auto_enable_reg]
	bic	r4, r4, #lowpower_auto_enable_msk
	orreq	r4, r4, r5, lsl #lowpower_auto_enable_offset
	str	r4, [r1, #lowpower_auto_enable_reg]
	ldr	r4, [r1, #lowpower_control_reg]
	orr	r4, r4, r5, lsl #lowpower_control_offset
	str	r4, [r1, #lowpower_control_reg]
	beq	.Lkeep_ddr_fast

	/*
	 * SLEEP_QUIRK_DIS_INP: switch pads to DDR1. Otherwise the DDR pads
	 * will consume too much power!
	 */
	tst	r0, #SLEEP_QUIRK_DIS_INP
	ldrne	r4, [r1, #phy_pad_ctrl_reg_0_reg]
	bicne	r4, r4, #phy_pad_ctrl_reg_0_pad_type
	strne	r4, [r1, #phy_pad_ctrl_reg_0_reg]
	dmb

	/*
	 * Switch off DRAM controller clock. Don't check if the write succeeded
	 * because this should always work and even if it fails we would just
	 * consume a bit too much power for a short time.
	 */
	mov	r6, #0x90
	ldr	r4, [r8, #DMW_CMU_SWCLKENR2]
	bic	r4, r4, #DMW_CMU_SWCLKENR2_DRAM
	str	r6, [r8, #DMW_CMU_WRPR]
	str	r4, [r8, #DMW_CMU_SWCLKENR2]

	/*
	 * Switch DRAM to 12Mhz, wait for switch to happen, and turn off PLL3
	 */
	ldr	r6, [r8, #DMW_CMU_CLKSWCNTRL]
	bic	r6, r6, #1
	str	r6, [r8, #DMW_CMU_CLKSWCNTRL]

1:
	ldr	r6, [r8, #DMW_CMU_CUSTATR]
	tst	r6, #(1 << 6)
	bne	1b

	tst	r0, #__SLEEP_PLL3_OFF
	ldrne	r4, [r8, #DMW_CMU_PLL3CONTROL]
	orrne	r5, r4, #DMW_CMU_PLL_PD
	strne	r5, [r8, #DMW_CMU_PLL3CONTROL]

.Lkeep_ddr_fast:

	/*
	 * Optionally switch CPU to 12MHz and disable PLL2.
	 */
	tst	r0, #__SLEEP_CPU_12M
	ldrne	r5, [r8, #DMW_CMU_CPUCLKCNTRL]
	bicne	r7, r5, #(3 << 8)
	strne	r7, [r8, #DMW_CMU_CPUCLKCNTRL]

	tst	r0, #__SLEEP_PLL2_OFF
	ldrne	r6, [r8, #DMW_CMU_PLL2CONTROL]
	orrne	r7, r6, #DMW_CMU_PLL_PD
	strne	r7, [r8, #DMW_CMU_PLL2CONTROL]

	/*
	 * If CPU *and* DDR are running on 12MHz we might clear the overdrive
	 * too. If the DRR is in manual self refresh we will disable RTT.
	 */
	tst	r0, #__SLEEP_DDR_12M
	strne	r3, [r11, #GPIO_DATA_CLR]	@ clear RTT
	tstne	r0, #__SLEEP_CPU_12M
	ldrne	r7, [r10, #GPIO_DATA]		@ get old state
	andne	r2, r2, r7
	strne	r2, [r10, #GPIO_DATA_CLR]	@ clear it

	/*
	 * Zzzzzzz
	 */
	dsb
	wfi

	/*
	 * Restart PLL2 and PLL3 simultaneously. Bring back CPU to full
	 * frequency as fast as possible. Restore overdrive before if it was
	 * disabled.
	 */
	strne	r2, [r10, #GPIO_DATA_SET]
	tst	r0, #__SLEEP_PLL3_OFF
	strne	r4, [r8, #DMW_CMU_PLL3CONTROL]
	tst	r0, #__SLEEP_PLL2_OFF
	strne	r6, [r8, #DMW_CMU_PLL2CONTROL]
	beq	3f				@ skip PLL2 LD if it was never disabled
2:
	ldr	r4, [r8, #DMW_CMU_PLL2CONTROL]
	tst	r4, #DMW_CMU_PLL_LD
	beq	2b

3:
	tst	r0, #__SLEEP_CPU_12M
	strne	r5, [r8, #DMW_CMU_CPUCLKCNTRL]	@ Switch CPU back at full freq

	/*
	 * Restore DRAM clock after PLL3 got stable.
	 */
	tst	r0, #__SLEEP_DDR_12M
	beq	4f
1:
	ldr	r4, [r8, #DMW_CMU_PLL3CONTROL]
	tst	r4, #DMW_CMU_PLL_LD
	beq	1b

	ldr	r6, [r8, #DMW_CMU_CLKSWCNTRL]
	orr r6, r6, #1
	str	r6, [r8, #DMW_CMU_CLKSWCNTRL]

1:
	ldr	r6, [r8, #DMW_CMU_CUSTATR]
	tst	r6, #(1 << 6)
	beq	1b

	/*
	 * Re-enable DRAM controller clock. Make sure the CMU write succeeded
	 * because the CSS might disturb the WRPR when reading from the CMU.
	 */
	mov	r6, #0x90
	ldr	r4, [r8, #DMW_CMU_SWCLKENR2]
	orr	r4, r4, #DMW_CMU_SWCLKENR2_DRAM
1:
	str	r6, [r8, #DMW_CMU_WRPR]
	str	r4, [r8, #DMW_CMU_SWCLKENR2]
	ldr	r7, [r8, #DMW_CMU_SWCLKENR2]	@ make sure write succeeded
	cmp	r4, r7
	bne	1b

	str	r3, [r11, #GPIO_DATA_SET]
	dsb					@ make sure RTT is back

	/*
	 * Switch back to DDR2 and exit low power mode.
	 */
	tst	r0, #SLEEP_QUIRK_DIS_INP
	ldrne	r4, [r1, #phy_pad_ctrl_reg_0_reg]
	orrne	r4, r4, #phy_pad_ctrl_reg_0_pad_type
	strne	r4, [r1, #phy_pad_ctrl_reg_0_reg]
4:
	ldr	r5, [r1, #lowpower_control_reg]
	bic	r5, #lowpower_control_msk
	str	r5, [r1, #lowpower_control_reg]
	dsb					@ wait until DRAM is back

	/*
	 * Fill one cache line (16 instructions) to avoid fetching instructions
	 * when the DRAM is still off.
	 */
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop

.L__end_of_prefetch:
	pop	{r4-r11}
	mov	pc, lr				@ done

.Ldram_lockup:
	ldr	r0, =.Ldram_lockup_msg
	mov	r1, r4
	b	panic

ENDPROC(dmw_sleep_selfrefresh)

	.section ".rodata"

.Ldram_lockup_msg:
	.asciz	"dmw_sleep_selfrefresh: DRAM doesn't get idle (0x%08lX)\n"

