Prop2 ROM code

cgracey · 2016-07-17 01:33

I've got the Prop2 ROM code all optimized now.

'****************************************
'*					*
'*	Propeller II ROM Booter		*
'*					*
'*	Version 0.1			*
'*					*
'*	07/16/2016			*
'*					*
'****************************************

CON

	id	=	$52	'version $20
'	id	=	$8F	'version $21
'	id	=	$BC	'version $22
'	id	=	$61	'version $23
'	id	=	$78	'version $24
'	id	=	$A5	'version $25
'	id	=	$96	'version $26
'	id	=	$4B	'version $27

	rx_pin	=	63
	tx_pin	=	62
	spi_cs	=	61
	spi_ck	=	60
	spi_di	=	59
	spi_do	=	58

DAT		org
'
'
' Read fuses
'
		decod ptra,#11			'read 256 fuses into $800..$81F

		rep	@.r,#256
		cogid	fuse_read	wc	'set fuse number
		cogid	fuse_read	wc	'read fuse
		rcr	x,#1			'save fuse
		add	fuse_read,#1		'next fuse
		test	fuse_read,#$1F	wz	'every 32 fuses, write to hub
	if_z	wrlong	x,ptra++
.r
		cogid	#0		wc	'disable fuses
'
'
' Attempt to boot from serial
'
		setb	outb,#tx_pin		'make tx high output
		setb	dirb,#tx_pin

		call	#rx_bit			'measure low rx calibration pulses (host $F9 -> %1..010011111..)
		mov	threshold,delta		'and calculate threshold
		call	#rx_bit			'(any timeout results in flash boot)
		add	threshold,delta
		shr	threshold,#1

		mov	count,#250		'ready to receive/verify 250 lfsr bits
.lfsrin		call	#rx_bit			'receive bit ($FE/$FF) into c
		test	lfsr,#$01	wz	'get lfsr bit into nz
   if_c_eq_z	jmp	#boot_flash		'if mismatch, boot from flash
		test	lfsr,#$B2	wc	'advance lfsr
		rcl	lfsr,#1
		djnz	count,#.lfsrin		'loop for next bit in

		mov	count,#250+8		'ready to transmit 250 lfsr bits + 8 version bits
.lfsrout	cmp	count,#8	wz	'if last 8 bits, set lfsr so that version will be output
	if_z	mov	lfsr,#id		'id results in version being sent
		test	lfsr,#$01	wz	'get lfsr/version bit into nz, z=1 on last iteration
		call	#wait_rx		'wait for rx low (convey incoming $F9 on rx_pin to $FE/$FF on tx_pin)
		clrb	outb,#tx_pin		'make tx low
		call	#wait_rx		'wait for rx high
		setbnz	outb,#tx_pin		'make tx lfsr/version bit
		call	#wait_rx		'wait for rx low
		setb	outb,#tx_pin		'make tx high
		call	#wait_rx		'wait for rx high
		test	lfsr,#$B2	wc	'advance lfsr
		rcl	lfsr,#1
		djnz	count,#.lfsrout		'loop for next bit out

		jmp	#load			'serial handshake done, attempt to load from serial (z=1)
'
'
' Receive bit (c) - compare incoming pulse to threshold
'
rx_bit		call	#wait_rx		'wait for rx low
		getct	y			'get time

		call	#wait_rx		'wait for rx high
		getct	delta			'get time

		sub	delta,y			'compare time delta to threshold
		cmp	delta,threshold	wc

		ret
'
'
' Wait for rx low/high - if timeout, attempt to boot from flash
'
wait_rx		setedg	edge			'ready edge

		getct	x			'wait for rx edge with timeout
		add	x,timeout
		setq	x
		waitedg			wc

		xor	edge,#%11_000000	'toggle edge

	if_nc	ret				'return if not timeout (boot_flash follows)
'
'
' Attempt to boot from flash
'
boot_flash	mov	count,#4		'ready for 3 resets and 1 read command

.cmd		setb	outb,#spi_cs		'spi_cs high
		or	dirb,dirb_flash

		clrb	outb,#spi_cs		'spi_cs low

		rep	@.r,#32			'ready for 32 command bits
		cmpr	count,#1	wc	'first three commands = $FF_FF_FF_FF (reset)
	if_nc	rol	spi_read,#1	wc,wz	'last command = $03_00_00_00 (read from 0), z=0
		setbc	outb,#spi_di
		setb	outb,#spi_ck		'cycle spi_ck
		clrb	outb,#spi_ck
.r
		djnz	count,#.cmd		'loop for next spi command
'
'
' Load from serial (z=1) or flash (z=0)
'
load		wrfast	#0,#0			'load loader into $000..$7DF, HMAC into $7E0..$7FF

		decod	count,#9		'ready to input $200 longs
.long		mov	i,#32			'ready to input 32 data bits

.bit	if_z	call	#rx_bit			'input serial bit (serial mode)
	if_nz	testb	inb,#spi_do	wc	'input spi_do (flash mode)
	if_nz	setb	outb,#spi_ck		'high spi_ck (flash mode)
	if_nz	clrb	outb,#spi_ck		'low spi_ck (flash_mode)
		rcl	z,#1			'shift bit into long
		djnz	i,#.bit			'loop, adequate time for next flash bit

		wflong	z			'store long in hub ram
		djnz	count,#.long		'loop for next long
'
'
' Authenticate loader HMAC signature
'
' $000..$7DF  = loader				($1F8 longs)
' $7E0..$7FF  = loader HMAC signature		($008 longs)
' $800..$81F  = fuses, first half are HMAC key	($008 longs)
' hashx[0..7] = proper HMAC signature (endian)	($008 longs)
'
		call	#hmac			'compute proper HMAC signature, rdfast at $7E0

		rep	@.r,#8			'compare HMAC signatures
		rflong	x			'get loader HMAC long
		movbyts	x,#%00_01_10_11		'do endian reversal before comparison
.hash		cmp	x,hashx		wz	'compare to proper HMAC long
	if_nz	clkset	#%0000_00_01		'if failed, set clock to rc slow
	if_nz	cogstop	#0			'if failed, stop cog0
		add	.hash,#1		'point to next proper HMAC long
.r
		coginit	#0,#0			'loader authenticated, relaunch cog0 with loader
'
'
' Booter constants
'
fuse_read	long	$200			'(becomes $300)
lfsr		long	"P"
edge		long	%10_000000 | rx_pin
timeout		long	20_000_000 / 1000 * 150	'150ms @20MHz (rcfast)
dirb_flash	long	%1110 << 58
spi_read	long	$03_00_00_00
'
'
'******************
'*  SHA-256/HMAC  *
'******************
'
' Start HMAC
'
hmac		call	#init_hash		'init hash

		decod	ptra,#11		'begin HMAC using key at $800..$80F
.ipad		cmp	bytes,#16	wc
	if_c	rdbyte	x,ptra++		'get and hash ipad key (full block)
	if_nc	mov	x,#$00			'after key bytes, hash $00's to fill block
		xor	x,#$36			'xor bytes with ipad ($36)
		call	#hash_byte		'(last iteration triggers hash_block, z=1)
	if_nz	jmp	#.ipad

		setd	i,#w			'save opad key
		setr	i,#opad_key
		rep	@.r,#16
		alti	i,#%111_111_000
		xor	0,opad			'xor bytes with opad ($5C)
.r
'
'
' Hash loader
'
		rdfast	#0,#0			'hash loader at $000..$7DF
		mov	count,##$7E0
.byte		rfbyte	x
		call	#hash_byte
		djnz	count,#.byte

		call	#end_hash		'end hash
'
'
' End HMAC
'
		sets	i,#opad_key		'get opad key into w[0..15] (full block)
		setd	i,#w
		call	#copy8
		call	#copy8

		call	#hash_block		'hash opad key

		sets	i,#hashx		'get hashx[0..7] into w[0..7]
		setd	i,#w
		call	#copy8

		setd	hash_byte,#w+8		'account for opad key and hashx bytes
		mov	bytes,#64+32		'(1-1/2 blocks, 1/2 block needs end_hash)
'
'
' End Hash - hash $80, any $00's needed to get to offset $38, then 8-byte length
'
end_hash	mov	length,bytes		'get message length in bits
		shl	length,#3

		mov	x,#$80			'hash end-of-message byte ($80)
.fill		call	#hash_byte		'(may trigger hash_block)
		mov	x,bytes			'hash any $00's needed to get to offset $38
		and	x,#$3F
		cmp	x,#$38		wz
		mov	x,#$00
	if_nz	jmp	#.fill

.len		test	bytes,#$04	wc	'hash 8-byte length, big-endian
	if_c	rol	length,#8		'(hash four $00's, then four length bytes)
	if_c	mov	x,length
		call	#hash_byte		'(last iteration triggers hash_block)
	if_nz	jmp	#.len

		sets	i,#hash			'save hash[0..7] into hashx[0..7]
		setd	i,#hashx
		call	#copy8

init_hash	sets	i,#hash_init		'copy hash_init[0..7] into hash[0..7]
		setd	i,#hash
		call	#copy8

		mov	bytes,#0		'reset byte count

		ret
'
'
' Hash Byte - add byte to w[0..15] and hash block if full (z=1)
'
hash_byte	rolbyte	w,x,#0			'store byte into w[0..15], big-endian

		add	bytes,#1		'increment byte count

		test	bytes,#$03	wz	'every 4th byte, increment w pointer
	if_z	alti	hash_byte,#%000_011_000

		test	bytes,#$3F	wz	'every 64th byte, reset w pointer
	if_z	setd	hash_byte,#w

	if_z	call	#hash_block		'every 64th byte, hash block

		ret
'
'
' Hash Block - first extend w[0..15] into w[16..63] to generate schedule
'
hash_block	mov	i,#16		'i = 16..63

.hashb		alts	i,#w-15		's0 = (w[i-15] -> 7) ^ (w[i-15] -> 18) ^ (w[i-15] >> 3)
		mov	x,0
		mov	y,x
		rol	y,#18-7
		xor	y,x
		ror	y,#18
		shr	x,#3
		xor	x,y

		alts	i,#w-2		's1 = (w[i-2] -> 17) ^ (w[i-2] -> 19) ^ (w[i-2] >> 10)
		mov	y,0
		mov	z,y
		rol	z,#19-17
		xor	z,y
		ror	z,#19
		shr	y,#10
		xor	y,z

		alts	i,#w-16		'w[i] = s0 + w[i-16]
		add	x,0
		alts	i,#w-7		'w[i] = s0 + w[i-16] + w[i-7]
		add	x,0
		altr	i,#w		'w[i] = s0 + w[i-16] + w[i-7] + s1
		add	x,y

		incmod	i,#63		'i++
		tjnz	i,#.hashb
'
'
' Load variables from hash
'
		sets	i,#hash		'copy hash[0..7] into a..h
		setd	i,#a
		call	#copy8
'
'
' Do 64 hash iterations on variables
'
		mov	i,#0		'i = 0..63

.hashi		mov	x,g		'ch = (e & f) ^ (!e & g)
		xor	x,f
		and	x,e
		xor	x,g

		mov	y,e		's1 = (e -> 6) ^ (e -> 11) ^ (e -> 25)
		rol	y,#11-6
		xor	y,e
		rol	y,#25-11
		xor	y,e
		ror	y,#25

		add	x,y		't1 = ch + s1
		alts	i,#k		't1 = ch + s1 + k[i]
		add	x,0
		alts	i,#w		't1 = ch + s1 + k[i] + w[i]
		add	x,0
		add	x,h		't1 = ch + s1 + k[i] + w[i] + h

		mov	y,c		'maj = (a & b) ^ (b & c) ^ (c & a)
		and	y,b
		or	y,a
		mov	h,c
		or	h,b
		and	y,h

		mov	h,a		's0 = (a -> 2) ^ (a -> 13) ^ (a -> 22)
		rol	h,#13-2
		xor	h,a
		rol	h,#22-13
		xor	h,a
		ror	h,#22

		add	y,h		't2 = maj + s0

		mov	h,g		'h = g
		mov	g,f		'g = f
		mov	f,e		'f = e
		mov	e,d		'e = d
		mov	d,c		'd = c
		mov	c,b		'c = b
		mov	b,a		'b = a

		add	e,x		'e = e + t1

		mov	a,x		'a = t1 + t2
		add	a,y

		incmod	i,#63		'i++
		tjnz	i,#.hashi
'
'
' Add variables back into hash
'
		sets	i,#a		'add a..h into hash[0..7]
		setd	i,#hash
		rep	@.r,#8
		alti	i,#%000_111_111
		add	0,0
.r
		ret
'
'
' Copy 8 registers
'
copy8		rep	@.r,#8
		alti	i,#%000_111_111
		mov	0,0
.r
		ret
'
'
' SHA-256/HMAC constants
'
opad		long	$36363636 ^ $5C5C5C5C

hash_init	long	$6A09E667, $BB67AE85, $3C6EF372, $A54FF53A	'fractionals of square roots of primes 2..19
		long	$510E527F, $9B05688C, $1F83D9AB, $5BE0CD19

k		long	$428A2F98, $71374491, $B5C0FBCF, $E9B5DBA5	'fractionals of cube roots of primes 2..311
		long	$3956C25B, $59F111F1, $923F82A4, $AB1C5ED5
		long	$D807AA98, $12835B01, $243185BE, $550C7DC3
		long	$72BE5D74, $80DEB1FE, $9BDC06A7, $C19BF174
		long	$E49B69C1, $EFBE4786, $0FC19DC6, $240CA1CC
		long	$2DE92C6F, $4A7484AA, $5CB0A9DC, $76F988DA
		long	$983E5152, $A831C66D, $B00327C8, $BF597FC7
		long	$C6E00BF3, $D5A79147, $06CA6351, $14292967
		long	$27B70A85, $2E1B2138, $4D2C6DFC, $53380D13
		long	$650A7354, $766A0ABB, $81C2C92E, $92722C85
		long	$A2BFE8A1, $A81A664B, $C24B8B70, $C76C51A3
		long	$D192E819, $D6990624, $F40E3585, $106AA070
		long	$19A4C116, $1E376C08, $2748774C, $34B0BCB5
		long	$391C0CB3, $4ED8AA4A, $5B9CCA4F, $682E6FF3
		long	$748F82EE, $78A5636F, $84C87814, $8CC70208
		long	$90BEFFFA, $A4506CEB, $BEF9A3F7, $C67178F2
'
'
' variables
'
vars				'start

delta		res	1	'booter
threshold	res	1

bytes		res	1	'SHA-256/HMAC
count		res	1
length		res	1

opad_key	res	16

hash		res	8
hashx		res	8

w		res	64

a		res	1
b		res	1
c		res	1
d		res	1
e		res	1
f		res	1
g		res	1
h		res	1

i		res	1
x		res	1
y		res	1
z		res	1

I may add a text-only loader, in case serial and flash fail to load and/or authenticate. For now, this program takes $12E longs out of a possible $1F8. That's 60% full.

Peter Jakacki · 2016-07-17 01:39

sweet, can't wait to try it.

potatohead · 2016-07-17 03:23

Hope text loader makes the cut. Good work Chip!

ozpropdev · 2016-07-17 04:40

Looks like things are moving along nicely.

jmg · 2016-07-17 07:51

cgracey wrote: »

I've got the Prop2 ROM code all optimized now.
..
I may add a text-only loader, in case serial and flash fail to load and/or authenticate. For now, this program takes $12E longs out of a possible $1F8. That's 60% full.

Looks good, any ideas what size a SD or even USB loader needs ?

cgracey · 2016-07-17 07:53

jmg wrote: »

cgracey wrote: »

I've got the Prop2 ROM code all optimized now.
..
I may add a text-only loader, in case serial and flash fail to load and/or authenticate. For now, this program takes $12E longs out of a possible $1F8. That's 60% full.

Looks good, any ideas what size a SD or even USB loader needs ?

No. SD is possibly simple if we look for a starting string.

Seairth · 2016-07-17 17:56

If multiple boot methods are supported (aside from serial), it seems unlikely that an application would use more than one. If there are any spare configuration fuses, I suggest using that to specify the mode. That way, by default, you'd get SPI, but could switch to I2C, QSPI, HyperBus, or whatever.

David Betz · 2016-07-17 18:37

cgracey wrote: »

I may add a text-only loader, in case serial and flash fail to load and/or authenticate. For now, this program takes $12E longs out of a possible $1F8. That's 60% full.

What am I missing here? There is already a way to load over a serial link, right? Why do we need two ways of doing that? Won't that just slow down boot from SPI flash?

jmg · 2016-07-17 21:11

David Betz wrote: »

... Won't that just slow down boot from SPI flash?

Certainly, SPI boot needs to be without long timeouts in front.

Is there not a way to fast-skip UART boot, eg by tie RXD low (or some other pin) with pull down ?

Tubular · 2016-07-17 21:20

Isn't the UART boot last, in which case SPI isn't delayed anyway?

I think uart boot is a good idea, it lowers the barriers. Lets you load over radio devices and is easy to debug. Could even load from a 4D systems screen, or an Osborne 1. Plus if done from a terminal the prop can immediately send data back

David Betz · 2016-07-17 22:12

Tubular wrote: »

Isn't the UART boot last, in which case SPI isn't delayed anyway?

I think uart boot is a good idea, it lowers the barriers. Lets you load over radio devices and is easy to debug. Could even load from a 4D systems screen, or an Osborne 1. Plus if done from a terminal the prop can immediately send data back

I still don't understand. We've always had UART boot even in P1. Are you suggesting that we change the funky loader protocol from P1 into a text-based protocol for P2? I don't see a reason to have two UART protocols.

Cluso99 · 2016-07-17 22:17

I have been thinking about the boot over the past few days and have some comments...

FWIW the P1 tries the serial port first. If the RXD(SI) pin P31 is low? then serial is skipped, otherwise it waits for a period of time for a serial handshake sequence before moving on. Next, I2C EEPROM is tried. If there is no eeprom then the P1 quits (effectively locks until reset). Otherwise, the whole 32KB is loaded from EEPROM into Hub RAM, the lower few longs are verified, and if correct the cog runs the spin program.

P2 Encryption

If the P2 has the encryption fuses non-blank (because I don't know if they are 1's or 0's when blank), then a key has been programmed. In this case, IMHO, the P2 should only run from SPI (Flash or perhaps SD). IMHO it should NOT accept download serial code. This can be done by the code in Flash or SD by the user. Remember, we don't want to help any brute force attacks. We want this to be totally under user control.
Also, the user is likely to want a fast load/run P2, so no delays waiting for serial, etc.

Non-Encryption (default)

If the P2 encryption fuses are blank, then the boot process should perform..
* serial (P63/52 as RXD(SI)/TXD(SO)
* SPI Flash
* (optionally) SPI SD
* (optionally) simple monitor/loader

Miscellaneous

You may recall I suggested some years ago, that we could use a single pin with various values of pullup/pulldowns. This could accomplish a fast boot without waiting for serial. Various pullup/pulldown values could select the boot order/sequence.

It has also been suggested that Quad SPI be used. This means that the SPI Flash pins ought to be on different pins to those suggested as P61-P58. I can add a drawing later (no time currently).

We can determine if an SD card is present by testing for a pullup on the nCS line (Peter J has shown that a physical pullup is not necessary, and that the SD Card when present presents a pullup high value, and o/c which can be tested when the SD Card is absent.

Here are my thoughts...

P63 = RXD(SI) serial (or USB)
P62 = TXD(SO) serial (or USB)
P61 = nCE - SD
P60 = nCS - Flash
P59 = D3/nHLD
P58 = D2/nWP
P57 = D1/SO
P56 = D0/SI

Postedit: Forgot the CLK pin!!! see my next post

The above permits the SD and Flash to share the D0-D3 pins, permits Quad SPI operation.

P?? = Optional pullup/pulldown - various values for boot options
It depends on how well we can detect the value. ie can we detect 1K/5K/10K vs the weak pullup of an external flash chip. Perhaps we can share the pin with the nCS or the nCE or else on one of the D0-D3 pins with pullup or pulldown. Need more discussion.

Perhaps it would be sufficient to detect the nCS (Flash) being pulled low (=>10K) for not being present, and therefore skip booting from Flash.

SD Card

I suggest we go with Peter J's method of checking the MBR sector for a string, and if found, use the sector number it points to for the sector of the $200 byte/long read for the boot code.

Peter Jakacki · 2016-07-17 23:16

If you can read one sector then you can read as many as is specified, so sector zero has the signature, a starting sector, and a contiguous sector count.

I really can't see the need for QSPI when we are running the SD in SPI mode and any Flash device is really only a boot device and if there were any need to store files on there then they are going to be pretty darn small so what's the need for QSPI here? It seems that we get confused as to what our actual requirements are because we are sidetracked by what we might be able to do.

The original Flash SPI pinout kept pin wastage to a minimum:

  	sfcs = 61
  	sfck = 60
  	sfdi = 59
  	sfdo = 58

If now we stick with that then we could make P57 the SD chip select. I also use P60 for an I2C SCL as well and although I allow for EEPROM in my restore routines I don't see a real need for it for boot.

It seems to me that we want low latency boot so waiting around for the serial port is not on, the PC is the one that should expect to wait after a reset and the P2 needs to check for basics first, such as the SPI Flash as that is nice and simple to init and read, then the SD card which can use the SD chip select as a card detect to quickly skip that if no card is present. If a card is present then it has to go through a "relatively" lengthy initialization before reading sector zero but if the boot signature fails then on to serial etc.

btw, the simple serial loader is only a backup for when the standard serial loader fails as I understand it. I like the idea and yes, we can use it over wireless links too.

David Betz · 2016-07-18 00:40

Peter Jakacki wrote: »

btw, the simple serial loader is only a backup for when the standard serial loader fails as I understand it. I like the idea and yes, we can use it over wireless links too.

Why would the standard serial loader fail in a way that wouldn't also cause the simple serial loader to fail? The main problem with the P1 standard loader is that it has very short timeouts so it is hard to manage from a PC where you can't control timing exactly. Otherwise, it is dead simple once you understand the protocol. If it fails, I don't see much hope that a "simple loader" will do any better.

Peter Jakacki · 2016-07-18 00:42

The simple loader doesn't do "any better" but allows pasting a simple hex file from anything so it is not PC or timing dependent. But nonetheless, why oppose options when you never lose, only gain?

Cluso99 · 2016-07-18 00:53

Peter,
The reason I suggested the pinout for Quad SPI is if there is no SD then quad spi can be used. The same D0/D1 pins can be used for SD and D0-D3 if using the SD in the fast mode (in case it is done later.

But I forgot to allow for the CLK pin in my suggested pinout. What I thought was a bad idea is to require the D0/D1 pins to need to be reconfigured to a new location if quad mode was used. Quad mode should have the D0-D3 pins on a nibble boundary, hence the suggestion of P56-59=D0-D3.

Yes, I agree with the SD format, that we can either read a fixed number of sectors of the number of sectors as a count in the MBR sector. And the way suggested is not format restricted, as a few of us have agreed in the past.

Here is a thought. Normally I would invert the nCS output pin for selecting my SRAM vs SD. We could do the same instead for selecting FLASH or SD. However, if only one of these were present as an either/or, we could use the pin for nCS for either FLASH or SD, depending on which was fitted.

I would expect that we could interrogate for FLASH and if an SD was connected it would fail to respond since it would not recognise the protocol. We can verify that this does not cause a problem with the SD.

Here is a revised suggestion..

 P63 = RXD(SI) serial (or USB)
 P62 = TXD(SO) serial (or USB)
 P61 = nCS (SD or FLASH, only one fitted. If both fitted, SD will be inverted)
 P60 = CLK 
 P59 = D3/nHLD
 P58 = D2/nWP
 P57 = D1/SO
 P56 = D0/SI

As for serial, it is a requirement that the serial be tried before Flash. This is because if you burn a bad program into flash, you can override it and reprogram it. If you boot into flash first, you can lockout serial downloading!

David Betz · 2016-07-18 00:55

Peter Jakacki wrote: »

The simple loader doesn't do "any better" but allows pasting a simple hex file from anything so it is not PC or timing dependent. But nonetheless, why oppose options when you never lose, only gain?

I was hoping that Chip would put a tiny Forth interpreter in ROM in place of this simple loader. Can you make one that will fit? :-)

Tubular · 2016-07-18 00:55

I guess I just prefer ASCII over binary. But also the relaxed timing is better for radio ( & satellite? ) links

However it timing out after a minute means its still timing dependent, just on a relaxed scale

Peter Jakacki · 2016-07-18 00:59

David Betz wrote: »

Peter Jakacki wrote: »

The simple loader doesn't do "any better" but allows pasting a simple hex file from anything so it is not PC or timing dependent. But nonetheless, why oppose options when you never lose, only gain?

I was hoping that Chip would put a tiny Forth interpreter in ROM in place of this simple loader. Can you make one that will fit? :-)

lol

but that would be "Open Firmware / OpenBoot" or else some might end up labeling it "a Forth Chip" and not giving it a second look!!!

Being based upon an interactive programming language, Open Firmware can be used to efficiently test and bring up new hardware. It allows drivers to be written and tested interactively. Operational video and mouse drivers are the only prerequisite for a graphical interface suitable for end-user diagnostics. Indeed, Apple shipped such a diagnostic "operating system" in many Power Macintoshes.

David Betz · 2016-07-18 01:01

Tubular wrote: »

I guess I just prefer ASCII over binary. But also the relaxed timing is better for radio ( & satellite? ) links

However it timing out after a minute means its still timing dependent, just on a relaxed scale

It is my understanding that the reason for the strange binary loader protocol is so it will work with the default RC clock. Will this simple serial protocol require a crystal?

Cluso99 · 2016-07-18 01:06

FWIW I only see the monitor/simple loader as a simple way to load the P2 via simple serial (does not require a PropPlug or any special reset circuitry) when Flash/SD does not exist or is unprogrammed. Basically it permits operation of the P2 with minimal hardware.

Once Flash or SD exists, then it is a simple matter to have a more thorough monitor/loader burnt into Flash or SD.

The debugger/monitor that I have been working on I thought could have been preloaded from ROM into hub so that any program could simply call it. The debugger/monitor can still be added to your code as an object, and reside in hub.

Peter Jakacki · 2016-07-18 01:08

David Betz wrote: »

Tubular wrote: »

I guess I just prefer ASCII over binary. But also the relaxed timing is better for radio ( & satellite? ) links

However it timing out after a minute means its still timing dependent, just on a relaxed scale

It is my understanding that the reason for the strange binary loader protocol is so it will work with the default RC clock. Will this simple serial protocol require a crystal?

It recalibrates on the space character that is used at the start and in between hex bytes so it is constantly adjusting with every byte. Not as efficient or as fast as the binary loader but guaranteed to work with just about anything anywhere, just paste your hex file through your Bluetooth terminal on your smartphone to program the P2.

David Betz · 2016-07-18 01:15

Peter Jakacki wrote: »

David Betz wrote: »

Tubular wrote: »

I guess I just prefer ASCII over binary. But also the relaxed timing is better for radio ( & satellite? ) links

However it timing out after a minute means its still timing dependent, just on a relaxed scale

It is my understanding that the reason for the strange binary loader protocol is so it will work with the default RC clock. Will this simple serial protocol require a crystal?

It recalibrates on the space character that is used at the start and in between hex bytes so it is constantly adjusting with every byte. Not as efficient or as fast as the binary loader but guaranteed to work with just about anything anywhere, just paste your hex file through your Bluetooth terminal on your smartphone to program the P2.

Wait a minute. You're saying the binary protocol is faster? How can that be? It only sends between 1-3 bits per byte. What baud rate will the simple loader allow compared with the standard loader?

jmg · 2016-07-18 01:25

Peter Jakacki wrote: »

I really can't see the need for QSPI when we are running the SD in SPI mode and any Flash device is really only a boot device and if there were any need to store files on there then they are going to be pretty darn small so what's the need for QSPI here?

Quite a lot of P2 designs will be SPI Flash only, and those are quite cheap.
I think these days, 2MBytes sub 20c/1k TSSOP8

Boot ROM itself does not need to be QuadSPI, but it is important that it is QuadSPI tolerant.
That means the extra Data pins, which are IIRC Hold and WE, must be legally defined during SPI boot, and SPI boot does need to reset any Quad mode.
(ie a Reset may arrive from a watchdog, while the user has QuadSPI mode enabled - this needs to exit Quad, then load, reliably)

Because Boot is size-definable, the stage 1 (ROM) can be Single SPI, then flip to QuadSPI.

Peter Jakacki · 2016-07-18 01:27

David Betz wrote: »

Wait a minute. You're saying the binary protocol is faster? How can that be? It only sends between 1-3 bits per byte. What baud rate will the simple loader allow compared with the standard loader?

Maybe that's right then, I was really just concentrating on the text loader advantages as a fall-back to the binary loader.

Re boot sequence
While it is true we need the serial bootloader to override internal boot I would hate for the internal boot to be delayed because of big timeouts. If serial activity is not detected within milliseconds then I think it should move on to the Flash and SD.

Re boot pins
I was thinking too that the chip select could be used for either the Flash or the SD as Cluso99 mentioned but I wouldn't try to run both even with an inverter. Either a system has a Flash and no SD or it uses the SD for boot as it has no Flash. Otherwise the bootloader should expect an alternate chip select for the SD if one is not detected on the Flash line. But where oh where is this 4-bit SD mode? Isn't this a proprietary licensed Secure Digital mode? That's why we all use SPI mode and I'd rather not waste extra pins on QSPI just to make it boot a fraction faster when we already have boot sequence delays etc that erode any speed gains.

David Betz · 2016-07-18 01:31

Peter Jakacki wrote: »

David Betz wrote: »

Wait a minute. You're saying the binary protocol is faster? How can that be? It only sends between 1-3 bits per byte. What baud rate will the simple loader allow compared with the standard loader?

Maybe that's right then, I was really just concentrating on the text loader advantages as a fall-back to the binary loader.

Sorry, I was wrong. Jeff Martin's loader that we're using on the ESP can pack up to 5 bits per byte in some cases. That's still less than 8 though.

Also, you didn't comment on my request that you provide a tiny Forth interpreter to go in the ROM? Certainly, squirting some Forth over the serial port would allow you to effect a simple loader and would also provide a way to interactively poke at the hardware without having to load anything else. I can't remember how big Chip said the ROM would be but would it be completely impossible to provide an interactive programmable interface instead of just a dumb loader?

jmg · 2016-07-18 02:03

Peter Jakacki wrote: »

That's why we all use SPI mode and I'd rather not waste extra pins on QSPI just to make it boot a fraction faster when we already have boot sequence delays etc that erode any speed gains.

See above - you do not have to waste extra pins, but I think the ROM loader needs to at least

a) define those extra pins during boot, so it can Boot from QuadSPI, used as Simple SPI.
b) Issue enough reset/init commands to exit any possible Quad MODE

It's hard to buy a Flash device these days that is not QuadSPI, and certainly the cheapest ones are QuadSPI, so those cheapest parts need to be tolerated, in all their possible connects.

eg Someone may boot entirely in 1b SPI, but use the majority of their 2MBytes for Fonts, that they read in using QuadSPI mode.

Peter Jakacki · 2016-07-18 02:18

David Betz wrote: »

Peter Jakacki wrote: »

David Betz wrote: »

Wait a minute. You're saying the binary protocol is faster? How can that be? It only sends between 1-3 bits per byte. What baud rate will the simple loader allow compared with the standard loader?

Maybe that's right then, I was really just concentrating on the text loader advantages as a fall-back to the binary loader.

Sorry, I was wrong. Jeff Martin's loader that we're using on the ESP can pack up to 5 bits per byte in some cases. That's still less than 8 though.

Also, you didn't comment on my request that you provide a tiny Forth interpreter to go in the ROM? Certainly, squirting some Forth over the serial port would allow you to effect a simple loader and would also provide a way to interactively poke at the hardware without having to load anything else. I can't remember how big Chip said the ROM would be but would it be completely impossible to provide an interactive programmable interface instead of just a dumb loader?

Text loader needs three bytes for every 8-bits so 33% utilization vs binary loader typical of maybe 50%.

I replied to your request for a tiny Forth almost as if you were being tongue-in-cheek and responding in like manner. I know I would love to have a tiny Forth interpreter in ROM but there seems to be quite a number who are not just "not interested" but even anti-Forth. It's a tool that as Open Firmware has worked well for many computer makers and EFI is just the bloated and conventionalized version of that which we find in any new PC. I just really doubt I would have any support at all for such a boot Forth.

David Betz · 2016-07-18 02:23

Peter Jakacki wrote: »

David Betz wrote: »

Peter Jakacki wrote: »

David Betz wrote: »

Wait a minute. You're saying the binary protocol is faster? How can that be? It only sends between 1-3 bits per byte. What baud rate will the simple loader allow compared with the standard loader?

Maybe that's right then, I was really just concentrating on the text loader advantages as a fall-back to the binary loader.

Sorry, I was wrong. Jeff Martin's loader that we're using on the ESP can pack up to 5 bits per byte in some cases. That's still less than 8 though.

Also, you didn't comment on my request that you provide a tiny Forth interpreter to go in the ROM? Certainly, squirting some Forth over the serial port would allow you to effect a simple loader and would also provide a way to interactively poke at the hardware without having to load anything else. I can't remember how big Chip said the ROM would be but would it be completely impossible to provide an interactive programmable interface instead of just a dumb loader?

Text loader needs three bytes for every 8-bits so 33% utilization vs binary loader typical of maybe 50%.

Well, I guess you don't really need spaces between the bytes. That's where I got 50% utilization.

I replied to your request for a tiny Forth almost as if you were being tongue-in-cheek and responding in like manner. I know I would love to have a tiny Forth interpreter in ROM but there seems to be quite a number who are not just "not interested" but even anti-Forth. It's a tool that as Open Firmware has worked well for many computer makers and EFI is just the bloated and conventionalized version of that which we find in any new PC. I just really doubt I would have any support at all for such a boot Forth.

I was actually serious. I put the smiley face after my question because I wasn't sure it was actually possible given the limited amount of ROM space available. On the other hand, I know you are able to squeeze a lot more out of a limited amount of program space than I would have thought possible. I would welcome a tiny Forth in ROM. I never liked the idea of a monitor because it wasn't programmable. Forth certainly is.

Peter Jakacki · 2016-07-18 02:27

jmg wrote: »

Peter Jakacki wrote: »

That's why we all use SPI mode and I'd rather not waste extra pins on QSPI just to make it boot a fraction faster when we already have boot sequence delays etc that erode any speed gains.

See above - you do not have to waste extra pins, but I think the ROM loader needs to at least

a) define those extra pins during boot, so it can Boot from QuadSPI, used as Simple SPI.
b) Issue enough reset/init commands to exit any possible Quad MODE

It's hard to buy a Flash device these days that is not QuadSPI, and certainly the cheapest ones are QuadSPI, so those cheapest parts need to be tolerated, in all their possible connects.

eg Someone may boot entirely in 1b SPI, but use the majority of their 2MBytes for Fonts, that they read in using QuadSPI mode.

I'm just interested in using these P2 chips in production not just playing with them to see what I can get out of it. I see all this HyperRAM and QSPI etc as nice but it should not drive the base requirements. I see nothing wrong with QSPI but come on, I wouldn't expect to be loading all 512kB into RAM or have such huge fonts that require megabytes of Flash when usable video memory is going to be less than 512k of hub RAM. QSPI capable memory easily supports dual and single SPI and IIRC when coding these they require a special command to put them into QSPI mode anyway, the default is SPI.

If your desire is more about pushing the limits then you can but dedicating boot pins based on this is not being practical though. However, whatever Chip does, he does.

Cluso99 · 2016-07-18 02:37

I was trying to place the Quad SPI pins so they do not require moving to get Quad SPI to work too. That was not meant for booting although that would be entirely possible given the initial boot sequence Chip is proposing would only boot a small section of code that would be used to take over and possibly load more which could be quad SPI mode. The D0-D3 pins need to be nibble based to work effectively.

Likewise, if the SD used the same D0-D3 pins, it may be possible to use the SD card in the licensed mode. I thought there were some people clean-room deciphering the code for free use. Anyway, it just makes it compatible with the SPI Quad mode.

Hopefully we could boot to either SPI, and if not found, try SD. If the boot code supports SD (which I sincerely hope it does) then I would expect anyone building a board with SD would not be likely to put on a SPI Flash chip since it would not be required.

BTW I am not expecting Quad SPI to the be-all end-all as many expect.

Prop2 ROM code

Comments