Prop2 FPGA files!!! - Updated 2 June 2018 - Final Version 32i

Rayman · 2016-10-03 11:09

Does version 12 also support 4-pin SPI?

ozpropdev · 2016-10-03 13:23

Chip
All flavours of V12 loaded Ok on their respective platforms.

cgracey · 2016-10-03 14:31

Rayman wrote: »

Does version 12 also support 4-pin SPI?h

No, just 3-pin.

cgracey · 2016-10-03 14:32

ozpropdev wrote: »

Chip
All flavours of V12 loaded Ok on their respective platforms.

Thanks for checking all those.

cgracey · 2016-10-03 18:09

The forum software is not letting me upload .spin2 files, so I will post here the program that writes a signed blinker program into an attached SPI flash chip:

' Program SPI flash with HMAC-signed OUTB blinker program
' - Connect SPI flash with a pull-up on spi_cs (and on spi_ck if you want faster booting)
' - Blinks OUTB on boot-up

CON

  spi_cs = 61
  spi_ck = 60
  spi_dq = 59     'this is both DI and DO tied together, make sure WPn and HOLDn are tied high

DAT


		org
'
'
' Init SPI pins
'
		outh	#spi_cs

		dirh	#spi_cs
		dirh	#spi_ck
'
'
' Erase first $1000 bytes
'
		call	#spi_wrena		'write enable

		mov	cmd,cmd_erase		'sector erase
		call	#spi_cmd32

		call	#spi_wait		'wait for completion
'
'
' Program first $400 bytes
'
		loc	ptra,#\pgmdata		'point to program data

.program	call	#spi_wrena		'write enable

		mov	cmd,cmd_program		'page program
		or	cmd,adr
		call	#spi_cmd32

.byte		rdbyte	cmd,ptra++		'get byte

		mov	x,#8			'send byte
		shl	cmd,#24
		call	#spi_out

		add	adr,#1			'page done?
		test	adr,#$FF	wz
	if_nz	jmp	#.byte

		call	#spi_wait		'wait for completion

		testb	adr,#10		wz	'another page?
	if_z	jmp	#.program
'
'
' Read data back to outa for viewing on logic analyzer (or reset to reboot new program)
'
		mov	dira,#$1FF

.read1k		mov	cmd,cmd_read		'start read
		call	#spi_cmd32

		outh	#8			'trigger signal
		outl	#8

		decod	y,#10			'read byte to outa
.read		call	#spi_in
		setbyte	outa,cmd,#0
		djnz	y,#.read

		jmp	#.read1k		'loop
'
'
' SPI write enable
'
spi_wrena	mov	cmd,#$06		'write enable
		call	#spi_cmd8

		ret
'
'
' SPI wait while busy
'
spi_wait	mov	cmd,#$05
		call	#spi_cmd8

.wait		call	#spi_in
		test	cmd,#$01	wc
	if_c	jmp	#.wait

		ret
'
'
' SPI command
'
spi_cmd32	mov	x,#32
		jmp	#spi_cmd

spi_cmd8	mov	x,#8
		shl	cmd,#24

spi_cmd		outh	#spi_cs
		outl	#spi_cs
'
'
' SPI long/byte out (x=bits, cmd=msbdata)
'
spi_out		dirh	#spi_dq			'make data output

.out		shl	cmd,#1		wc	'get bit to send
		outc	#spi_dq			'set data to bit
		outh	#spi_ck			'clock high
		cmp	x,#2		wc	'last bit?
	if_c	dirl	#spi_dq			'if last bit, make data input
		outl	#spi_ck			'clock low
		djnz	x,#.out			'loop to output bits

		ret
'
'
' SPI byte in (cmd)
'
spi_in		rep	@.in,#8			'ready to input a byte
		outh	#spi_ck			'clock high
		outl	#spi_ck			'clock low
		testin	#spi_dq		wc	'sample data bit ('testin' is from before 'outl')
		rcl	cmd,#1			'save data bit
.in
		ret
'
'
' Data
'
cmd_erase	long	$20_00_00_00
cmd_program	long	$02_00_00_00
cmd_read	long	$03_00_00_00
adr		long	0
'
'
' Variables
'
cmd		res	1
x		res	1
y		res	1
'
'
' Program Data
'
' first 20 bytes are blinker program:
'
'	not	dirb
'.lp	not	outb
'	waitx	##20_000_000/4
'	jmp	#.lp
'
' last 32 bytes are signature (key=0)
'
	orgh

pgmdata	byte	$FB,$F7,$23,$F6,$FD,$FB,$23,$F6,$25,$26,$80,$FF,$28,$80,$66,$FD	'blinker program
	byte	$F0,$FF,$9F,$FD,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	byte	$99,$AA,$44,$98,$86,$E2,$C8,$71,$C3,$1E,$60,$BF,$A3,$36,$19,$7A	'SHA-256/HMAC signature
	byte	$F5,$3D,$53,$97,$5C,$AF,$BA,$BB,$B7,$7F,$C3,$0A,$B4,$24,$02,$40

Publison · 2016-10-03 19:03

.spin2 should now work.

jmg · 2016-10-03 19:57

cgracey wrote: »

It's in the Google doc, at the end. You just put a "-" between the command word and the first hex value.

You mean like this ?

" Prop_Txt - 0 0 0 0 "

Might pay to put an exact example in the DOCs ?

cgracey wrote: »

The booter waits 10ms, in any case, before responding serially.

~10ms from what ? From the end of a valid IP string, start bit, or any edge ?
What about any error messages - are they also 10ms from last-RX activity ?

Prop_Txt is a little unclear, but from other examples I infer it terminates in either
" ~" ie Space+Tilde or
"|" followed by 32*8/6 = 42.666 Base64 + CR
That means 43 base 64 chars, with last 2 bits don't care ?

With a 100ms upper limit, and a 10ms+ lower limit, plus add in reset-cap effects, and this all starts to narrow down....

How long from reset pin (rapid) rise, until the P2 can sense RX ?

What happens if a 20h char is partway through, when the P2 exits Reset ?
With two low regions, which one is used for AutoBaud ?

If the P2 sends its verbose CR+LF+”FAIL”+CR+LF in One-Pin mode, that's going to scramble the Rx and cause more fails... ?

"If an external pull-up resistor is sensed on P60 (SPI_CK):"

How is that external pull-up resistor sensed - does it drive P60 low, briefly, then read after release to see Floating ?

Typical One-Pin use would be to have the MCU pulse the P2 reset, but then it needs to wait for any RST cap to RAMP, so is unsure when to start.
If it starts wrong, the load will fail, and speed is quite important here.

If P2 sent 0xFF, or 0xFE, (at the uncal 115200?) on RXD(P63) when it was ready, the MCU could sense that, and start with best precision. It no longer has to hope.

What could the P2 Auto-baud up to ? Most small MCUs these days have CalOsc and can go well above 115200.
eg 345600 ? (22.1184M/64)

captbill · 2016-10-03 20:42

Where can I find the Verilog source for this processor? I want to compile it for the Pipistrello/Pepino boards from Saanlima.

Publison · 2016-10-03 22:19

captbill wrote: »

Where can I find the Verilog source for this processor? I want to compile it for the Pipistrello/Pepino boards from Saanlima.

Post #1 shows the platforms that are supported.

Rayman · 2016-10-03 22:55

There is Verilog code for the P1 over in the P1 forum.
Probably be years before P2 code is released...
P2 chips aren't even available yet...

Peter Jakacki · 2016-10-03 23:34

Rayman wrote: »

There is Verilog code for the P1 over in the P1 forum.
Probably be years before P2 code is released...
P2 chips aren't even available yet...

I doubt that P2 FPGA source would ever be released as that would be commercial suicide. Parallax will have to sell lots of P2 silicon just to recoup investment alone. Perhaps when P3 is in silicon they might consider it but AFAIK chip companies have never ever done this before and Parallax are the only ones to have taken this bold step into this Brave New Open World.

ozpropdev · 2016-10-04 00:39

' - Connect SPI flash with a pull-up on spi_cs (and on spi_ck if you want faster booting)

  spi_cs = 61
  spi_ck = 60

FYI On the DE0-Nano bare builds these pins are output only.

cgracey · 2016-10-04 02:21

jmg wrote: »

cgracey wrote: »

It's in the Google doc, at the end. You just put a "-" between the command word and the first hex value.

You mean like this ?

" Prop_Txt - 0 0 0 0 "

Might pay to put an exact example in the DOCs ?

cgracey wrote: »

The booter waits 10ms, in any case, before responding serially.

~10ms from what ? From the end of a valid IP string, start bit, or any edge ?
What about any error messages - are they also 10ms from last-RX activity ?

Prop_Txt is a little unclear, but from other examples I infer it terminates in either
" ~" ie Space+Tilde or
"|" followed by 32*8/6 = 42.666 Base64 + CR
That means 43 base 64 chars, with last 2 bits don't care ?

With a 100ms upper limit, and a 10ms+ lower limit, plus add in reset-cap effects, and this all starts to narrow down....

How long from reset pin (rapid) rise, until the P2 can sense RX ?

What happens if a 20h char is partway through, when the P2 exits Reset ?
With two low regions, which one is used for AutoBaud ?

If the P2 sends its verbose CR+LF+”FAIL”+CR+LF in One-Pin mode, that's going to scramble the Rx and cause more fails... ?

"If an external pull-up resistor is sensed on P60 (SPI_CK):"

How is that external pull-up resistor sensed - does it drive P60 low, briefly, then read after release to see Floating ?

Typical One-Pin use would be to have the MCU pulse the P2 reset, but then it needs to wait for any RST cap to RAMP, so is unsure when to start.
If it starts wrong, the load will fail, and speed is quite important here.

If P2 sent 0xFF, or 0xFE, (at the uncal 115200?) on RXD(P63) when it was ready, the MCU could sense that, and start with best precision. It no longer has to hope.

What could the P2 Auto-baud up to ? Most small MCUs these days have CalOsc and can go well above 115200.
eg 345600 ? (22.1184M/64)

Thanks for noticing all these ambiguities. I went through and added explanations to cover most of them.

It's true that responding to an error in half-duplex could cause more errors. Maybe half-duplex is a bad idea. It really doesn't make sense when considering the inappropriate-character-error, which would only happen by accident, at any random time, and likely be clobbered by ongoing, incoming data. I think I will just get rid of half-duplex mode. I'll pull it out of the doc, for now.

At the 20MHz RC osc rate, above 115,200 baud works, but by the time you get to 230,400 baud, it becomes iffy. So, I just say it's limited to 115,200 baud.

jmg · 2016-10-04 02:49

cgracey wrote: »

It's true that responding to an error in half-duplex could cause more errors. Maybe half-duplex is a bad idea. It really doesn't make sense when considering the inappropriate-character-error, which would only happen by accident, at any random time, and likely be clobbered by ongoing, incoming data. I think I will just get rid of half-duplex mode. I'll pull it out of the doc, for now.

I think killing the One-Pin mode is a little drastic, some fine tuning is all that is needed.

A verbose async echo is likely a problem, but you could have a simple handshake ACK scheme ?
eg if the MCU sends 0xFF for example, the P2 could echo a single char as a Good/Bad, since last query ?

Some RF links are more half-duplex in nature, so this could be better than unsolicited replies.

That could work in both modes ?

cgracey wrote: »

At the 20MHz RC osc rate, above 115,200 baud works, but by the time you get to 230,400 baud, it becomes iffy. So, I just say it's limited to 115,200 baud.

230400 I make a 1.152% step size on a 20MHz clock, which is +/- 0.576% from an ideal value.

That should be ok, with a 1 SysCLK granular AutoBAUD, and even less than that, if using more than one bit time, or fractional baud schemes.

The main issue now, is how to sync the load, for best speed, and how to manage reboot on SW reset ?
I think P2 needs to send a ready Char ?

Addit: I coded some more, and I think the suggested ACK scheme above can also be used to signal READY.

MCU can send a 500us~1ms ENQ char, and listen for a ACK/Rdy - then it knows it can stream loader code from then.
I used 0xFE, as that is more tolerant of reset exit, and idles mostly hi, so is more baud-rate tolerant.

This removes possible variations in Reset CAP / Pullups, and drops the boot time to no more than 500us from Reset ready.

It also removes the risk of missing the ready char, by simply ensuring both MCU and P2 have exited reset before it proceeds.
I coded a loop in the Boot MCU for 500us timed ENQs, in 29 bytes. (One-Pin tolerant)

Adding a single char ENQ and a single char choice for OK/RDY or Error since last ENQ should be simple to do ?

This should also make PC links easier to manage, as they can do a similar 1-2ms polling cadence over USB-UARTS
It could maybe AutoBAUD on the ENQ char too ?

KeithE · 2016-10-05 14:59

Peter Jakacki wrote: »

I doubt that P2 FPGA source would ever be released as that would be commercial suicide. Parallax will have to sell lots of P2 silicon just to recoup investment alone. Perhaps when P3 is in silicon they might consider it but AFAIK chip companies have never ever done this before and Parallax are the only ones to have taken this bold step into this Brave New Open World.

One possibility would be to release it in an encrypted form. (`pragma protect begin ... `pragma protect end) It would take work to set this up, but it could enable others to target FPGA platforms. I think that it would have to be structured in a way that Altera-specific code was not encrypted so that it could be ported, and Parallax would have to have confidence that it couldn't be misused.

I imagine that Chip is so busy right now that this isn't going to happen. But perhaps if all the FPGA targeting is consuming a lot of time, he would consider making an encrypted release to a trusted (under NDA?) third party willing to take on that burden. Otherwise there probably isn't any immediate benefit to Parallax in such an exercise.

potatohead · 2016-10-05 16:13

Ideally, we see real chips next year.

Tubular · 2016-10-05 22:29

The other question is whether there is a way to encapsulate the P2 into an IP block so it could be adapted onto different platforms.

I'd pay for that right now, were it possible, and Parallax would retain the source. ie A block that's one step back from the RBF we currently use, but would allow us to do our own QSF pin connections, and/or stuff some glue logic in there.

edit: The other benefit is it might free up a bit of time for Chip, since the compiling and testing for all the different platforms could be looked after by others.

jmg · 2016-10-05 22:59

Tubular wrote: »

I'd pay for that right now, were it possible, and Parallax would retain the source. ie A block that's one step back from the RBF we currently use, but would allow us to do our own QSF pin connections, and/or stuff some glue logic in there.

What process and packages would you target ?

I know IC vendors often do a joint development, where a lead customer gets design-input, and sometimes an early start from first production allocation. That works well where the part has a usable general market, on top of the customer's use.

I saw this news go past recently...
http://www10.edacafe.com/nbc/articles/1/1457073/MagnaChip-Announces-Cost-Competitive-0.13-micron-Slim-Flash-Process-Technology

Sounds Microcontroller/P2 compatible.

Tubular · 2016-10-06 01:39

Well, any process/platform/package becomes available, within the Altera range. That flexibility is worth a fair bit in itself

My local Altera rep is on leave for a few more weeks, I'll find out whats involved. I'd like to understand more about the workflow, regardless of whether Parallax are interested or not

cgracey · 2016-10-06 03:49

Tubular wrote: »

Well, any process/platform/package becomes available, within the Altera range. That flexibility is worth a fair bit in itself

My local Altera rep is on leave for a few more weeks, I'll find out whats involved. I'd like to understand more about the workflow, regardless of whether Parallax are interested or not

If you wanted to pick an Altera FPGA and give me a list of Prop2 pins, I'll do a compile for you. I could also give you a few pointers on laying out an FPGA board.

Tubular · 2016-10-06 05:33

Thanks Chip, if I can pick your brain for half a hour on Skype or cell that'd be most helpful.

We're down to a few longs in the DE0-Nano and frankly all growth options from have drawbacks. We've had some preliminary discussions with Terasic about something custom, but there would be a significant MOQ for that approach. Still, it's the best option from a minimal time/resource point of view, so far.

Rayman · 2016-10-10 14:05

Still seems like many of the instructions are still undocumented...

For example (looking at ozpropdev's code), "setbyts" apparently copies the low byte of the source long to all four bytes of the destination.

But, I can't find anywhere that documents this...
The link in the top post just says this:
SETBYTS D , S/# Set Bytes
Set ?

Same for movbyts, splitb, mergeb, splitw, mergew, etc...

cgracey · 2016-10-10 14:33

Rayman wrote: »

Still seems like many of the instructions are still undocumented...

For example (looking at ozpropdev's code), "setbyts" apparently copies the low byte of the source long to all four bytes of the destination.

But, I can't find anywhere that documents this...
The link in the top post just says this:
SETBYTS D , S/# Set Bytes
Set ?

Same for movbyts, splitb, mergeb, splitw, mergew, etc...

I will document the instructions next. That's a big hole, currently.

ozpropdev · 2016-10-11 11:50

cgracey wrote:

I will document the instructions next. That's a big hole, currently.

One omission in the instruction list is the second variant of the GETRND that returns a random bit in the nominated C/Z flag.

CCCC 1101011 CZ0 DDDDDDDDD 000011011        GETRND  D         {WC,WZ}
CCCC 1101011 CZ1 000000000 000011011        GETRND  WC,WZ     (Must be at least one effect used)

BTW Chip, In P2-Hot we had GETLFSR which was local to each cog and now we have GETRND in the current P2.
In another thread you indicated that this is now hub based. I assume it is now free running as opposed to the P2-Hot variant?

cgracey · 2016-10-15 10:22

I'm getting the next release ready.

Thanks, jmg, for all your pushing. I got the autobaud working with a lot of headroom at 460,800 baud, worst RC OSC case. It's probably good for ~700k baud, worse case. I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud, but in the end, I thought it was much safer to always autobaud from scratch, as we don't know how much time delay there could be between bytes, which would allow for RC drift. What we have now is very solid. The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.

I changed the serial error reporting so that now "?" causes a "." (okay) or a "!" (error) character to be sent at any time within or after a command. The "." and "!" characters are now treated as white space, so there is no conflict with single-pin loading schemes, where the serial receiver is going to see the serial output, as well as incoming data. You just have to allow time for such interaction. The old way was kind of a mess. This is quite clean.

Also, RDPIN/WRPIN/WXPIN/WYPIN all automatically generate an AKPIN, so this simplifies code quite a bit. You almost never need a discrete AKPIN, but it's still there in case one of the others is not desirable.

The smart pins now return a C flag, so the time-states mode doesn't need to put the last-state bit into the MSB of the result, but conveys it via C. The USB conveys the error bit via C.

The serial receiver modes now leave the data MSB-justified, so it is up to you to do a 'SHR x,#24' after receiving a byte, for example. This cleaned up the hardware and simplifies the description.

Lastly, all those RDPIN/WRPIN conduits are 32 bits, taking only 2 clocks, so timing is much faster. This should help USB full-speed quite a bit.

cgracey · 2016-10-15 10:40

ozpropdev wrote: »
cgracey wrote:

I will document the instructions next. That's a big hole, currently.

One omission in the instruction list is the second variant of the GETRND that returns a random bit in the nominated C/Z flag.
CCCC 1101011 CZ0 DDDDDDDDD 000011011        GETRND  D         {WC,WZ}
CCCC 1101011 CZ1 000000000 000011011        GETRND  WC,WZ     (Must be at least one effect used)
BTW Chip, In P2-Hot we had GETLFSR which was local to each cog and now we have GETRND in the current P2.
In another thread you indicated that this is now hub based. I assume it is now free running as opposed to the P2-Hot variant?

That's correct. And each cog gets a different pattern of bits.

Rayman · 2016-10-15 13:11

I think it would also be nice if there were some instructions for using hubexec.

Seem to remember some caveats for usage...

I'm trying to remember why we need to use "LOC". The keyword LOC is mentioned in the docs in relation to PA/PB, but is not defined there.

Actually, I don't think "LOC" is defined anywhere in any of the docs...

jmg · 2016-10-15 18:45

cgracey wrote: »

I changed the serial error reporting so that now "?" causes a "." (okay) or a "!" (error) character to be sent at any time within or after a command. The "." and "!" characters are now treated as white space, so there is no conflict with single-pin loading schemes, where the serial receiver is going to see the serial output, as well as incoming data. You just have to allow time for such interaction. The old way was kind of a mess. This is quite clean.

Sounds great - does the AutoBaud character also echo a "." too ?
In my OnePin code, coverage of use cases requires that the Autobaud echo some char when done.
This allows the loading MCU to continually ping the AutoBaud char, and when it sees the expected echo, it can immediately start download.
Having these as NOPs is a good idea.

To support single-pin schemes, with this fastest-response handshake, I was planning on two Autobaud Chars.
One char selects one-Pin, & the other selects 2 pin, the decision is a simple part of the valid-limits test.

jmg · 2016-10-15 19:07

cgracey wrote: »

Thanks, jmg, for all your pushing. I got the autobaud working with a lot of headroom at 460,800 baud, worst RC OSC case. It's probably good for ~700k baud, worse case. I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud, but in the end, I thought it was much safer to always autobaud from scratch, as we don't know how much time delay there could be between bytes, which would allow for RC drift. What we have now is very solid. The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.

I'm not following all of this.
What does "two-stage autobaud scheme" & "autobaud from scratch" mean, and how does what you have done instead, differ ?

Is Fractional Baud still in there ?
If so, how does the fractional bits map to the 10 available bit-add slots ?

If you want to allow & follow for RC drift, I think you mean Live Autobaud-Tracking.

I looked at that in another thread,
http://forums.parallax.com/discussion/comment/1389873/#Comment_1389873

and the ideal char for this, which I'll call AutoBaud-Tracking, is 0x55 "U", which is unique in having the most edges in a given time. (of course, remove "U" from 64b table)

I think the Smart Pins ability to Time X edges on B, started by A can be used here, viz:

The Pin Docs are not easy to follow, but needed is simple
* Start measurement on Falling edge A (here the Start bit)
* Count 5(X=5) _/= on B, then Capture time from Start ie @ Stop bit, t9-baud capture
* Wait for read and re-arm on read

I think that is supported now ?

That hardware drops to a single* Smart-Pin read during RX & the AutoBaud-Track code becomes a very small and fast

INT_RX:
 nRise = CaptF_5thR()   // read and [u]re-prime.
 IF nRise <= t9*1.1 THEN  // only possible on "U", all others > 1.2
   t9 = nRise  // Update and trim for temperature drift changes
   TxChar(AutoBaudEcho)  // optional ack the AutoBaud-Track char
 END

* dual capture & dual read is still needed on First Autobaud char), as you need to reject mid-char-reset-exit case.

jmg · 2016-10-15 19:21

cgracey wrote: »

... I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud,

Sounds very good this far...

cgracey wrote: »

... The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.

If the SHA imposes some ~1MBd limit, why not use the 3MBd code, to work all the way up to that ~1M Baud ceiling ?

The less time spend on Char Rx, the more time you have for SHA code, so highest-speed AutoBaud code is still useful here.

eg Above, I have very compact and fast AutoBaud tracking code, which should be 3MBd capable and give many spare cycles at 1MBaud.

In my One-Pin code, I also changed the 64b mapping table, as your original has 5 decisions, which is both slower and larger than it needs to be.
That can pack down to 1 or 2 decisions, and still stay ASCII, and that also boosts spare cycles.

Prop2 FPGA files!!! - Updated 2 June 2018 - Final Version 32i

Comments