The forum software is not letting me upload .spin2 files, so I will post here the program that writes a signed blinker program into an attached SPI flash chip:
' Program SPI flash with HMAC-signed OUTB blinker program
' - Connect SPI flash with a pull-up on spi_cs (and on spi_ck if you want faster booting)
' - Blinks OUTB on boot-up
CON
spi_cs = 61
spi_ck = 60
spi_dq = 59 'this is both DI and DO tied together, make sure WPn and HOLDn are tied high
DAT
org
'
'
' Init SPI pins
'
outh #spi_cs
dirh #spi_cs
dirh #spi_ck
'
'
' Erase first $1000 bytes
'
call #spi_wrena 'write enable
mov cmd,cmd_erase 'sector erase
call #spi_cmd32
call #spi_wait 'wait for completion
'
'
' Program first $400 bytes
'
loc ptra,#\pgmdata 'point to program data
.program call #spi_wrena 'write enable
mov cmd,cmd_program 'page program
or cmd,adr
call #spi_cmd32
.byte rdbyte cmd,ptra++ 'get byte
mov x,#8 'send byte
shl cmd,#24
call #spi_out
add adr,#1 'page done?
test adr,#$FF wz
if_nz jmp #.byte
call #spi_wait 'wait for completion
testb adr,#10 wz 'another page?
if_z jmp #.program
'
'
' Read data back to outa for viewing on logic analyzer (or reset to reboot new program)
'
mov dira,#$1FF
.read1k mov cmd,cmd_read 'start read
call #spi_cmd32
outh #8 'trigger signal
outl #8
decod y,#10 'read byte to outa
.read call #spi_in
setbyte outa,cmd,#0
djnz y,#.read
jmp #.read1k 'loop
'
'
' SPI write enable
'
spi_wrena mov cmd,#$06 'write enable
call #spi_cmd8
ret
'
'
' SPI wait while busy
'
spi_wait mov cmd,#$05
call #spi_cmd8
.wait call #spi_in
test cmd,#$01 wc
if_c jmp #.wait
ret
'
'
' SPI command
'
spi_cmd32 mov x,#32
jmp #spi_cmd
spi_cmd8 mov x,#8
shl cmd,#24
spi_cmd outh #spi_cs
outl #spi_cs
'
'
' SPI long/byte out (x=bits, cmd=msbdata)
'
spi_out dirh #spi_dq 'make data output
.out shl cmd,#1 wc 'get bit to send
outc #spi_dq 'set data to bit
outh #spi_ck 'clock high
cmp x,#2 wc 'last bit?
if_c dirl #spi_dq 'if last bit, make data input
outl #spi_ck 'clock low
djnz x,#.out 'loop to output bits
ret
'
'
' SPI byte in (cmd)
'
spi_in rep @.in,#8 'ready to input a byte
outh #spi_ck 'clock high
outl #spi_ck 'clock low
testin #spi_dq wc 'sample data bit ('testin' is from before 'outl')
rcl cmd,#1 'save data bit
.in
ret
'
'
' Data
'
cmd_erase long $20_00_00_00
cmd_program long $02_00_00_00
cmd_read long $03_00_00_00
adr long 0
'
'
' Variables
'
cmd res 1
x res 1
y res 1
'
'
' Program Data
'
' first 20 bytes are blinker program:
'
' not dirb
'.lp not outb
' waitx ##20_000_000/4
' jmp #.lp
'
' last 32 bytes are signature (key=0)
'
orgh
pgmdata byte $FB,$F7,$23,$F6,$FD,$FB,$23,$F6,$25,$26,$80,$FF,$28,$80,$66,$FD 'blinker program
byte $F0,$FF,$9F,$FD,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
byte $99,$AA,$44,$98,$86,$E2,$C8,$71,$C3,$1E,$60,$BF,$A3,$36,$19,$7A 'SHA-256/HMAC signature
byte $F5,$3D,$53,$97,$5C,$AF,$BA,$BB,$B7,$7F,$C3,$0A,$B4,$24,$02,$40
The booter waits 10ms, in any case, before responding serially.
~10ms from what ? From the end of a valid IP string, start bit, or any edge ?
What about any error messages - are they also 10ms from last-RX activity ?
Prop_Txt is a little unclear, but from other examples I infer it terminates in either
" ~" ie Space+Tilde or
"|" followed by 32*8/6 = 42.666 Base64 + CR
That means 43 base 64 chars, with last 2 bits don't care ?
With a 100ms upper limit, and a 10ms+ lower limit, plus add in reset-cap effects, and this all starts to narrow down....
How long from reset pin (rapid) rise, until the P2 can sense RX ?
What happens if a 20h char is partway through, when the P2 exits Reset ?
With two low regions, which one is used for AutoBaud ?
If the P2 sends its verbose CR+LF+”FAIL”+CR+LF in One-Pin mode, that's going to scramble the Rx and cause more fails... ?
"If an external pull-up resistor is sensed on P60 (SPI_CK):"
How is that external pull-up resistor sensed - does it drive P60 low, briefly, then read after release to see Floating ?
Typical One-Pin use would be to have the MCU pulse the P2 reset, but then it needs to wait for any RST cap to RAMP, so is unsure when to start.
If it starts wrong, the load will fail, and speed is quite important here.
If P2 sent 0xFF, or 0xFE, (at the uncal 115200?) on RXD(P63) when it was ready, the MCU could sense that, and start with best precision. It no longer has to hope.
What could the P2 Auto-baud up to ? Most small MCUs these days have CalOsc and can go well above 115200.
eg 345600 ? (22.1184M/64)
There is Verilog code for the P1 over in the P1 forum.
Probably be years before P2 code is released...
P2 chips aren't even available yet...
I doubt that P2 FPGA source would ever be released as that would be commercial suicide. Parallax will have to sell lots of P2 silicon just to recoup investment alone. Perhaps when P3 is in silicon they might consider it but AFAIK chip companies have never ever done this before and Parallax are the only ones to have taken this bold step into this Brave New Open World.
The booter waits 10ms, in any case, before responding serially.
~10ms from what ? From the end of a valid IP string, start bit, or any edge ?
What about any error messages - are they also 10ms from last-RX activity ?
Prop_Txt is a little unclear, but from other examples I infer it terminates in either
" ~" ie Space+Tilde or
"|" followed by 32*8/6 = 42.666 Base64 + CR
That means 43 base 64 chars, with last 2 bits don't care ?
With a 100ms upper limit, and a 10ms+ lower limit, plus add in reset-cap effects, and this all starts to narrow down....
How long from reset pin (rapid) rise, until the P2 can sense RX ?
What happens if a 20h char is partway through, when the P2 exits Reset ?
With two low regions, which one is used for AutoBaud ?
If the P2 sends its verbose CR+LF+”FAIL”+CR+LF in One-Pin mode, that's going to scramble the Rx and cause more fails... ?
"If an external pull-up resistor is sensed on P60 (SPI_CK):"
How is that external pull-up resistor sensed - does it drive P60 low, briefly, then read after release to see Floating ?
Typical One-Pin use would be to have the MCU pulse the P2 reset, but then it needs to wait for any RST cap to RAMP, so is unsure when to start.
If it starts wrong, the load will fail, and speed is quite important here.
If P2 sent 0xFF, or 0xFE, (at the uncal 115200?) on RXD(P63) when it was ready, the MCU could sense that, and start with best precision. It no longer has to hope.
What could the P2 Auto-baud up to ? Most small MCUs these days have CalOsc and can go well above 115200.
eg 345600 ? (22.1184M/64)
Thanks for noticing all these ambiguities. I went through and added explanations to cover most of them.
It's true that responding to an error in half-duplex could cause more errors. Maybe half-duplex is a bad idea. It really doesn't make sense when considering the inappropriate-character-error, which would only happen by accident, at any random time, and likely be clobbered by ongoing, incoming data. I think I will just get rid of half-duplex mode. I'll pull it out of the doc, for now.
At the 20MHz RC osc rate, above 115,200 baud works, but by the time you get to 230,400 baud, it becomes iffy. So, I just say it's limited to 115,200 baud.
It's true that responding to an error in half-duplex could cause more errors. Maybe half-duplex is a bad idea. It really doesn't make sense when considering the inappropriate-character-error, which would only happen by accident, at any random time, and likely be clobbered by ongoing, incoming data. I think I will just get rid of half-duplex mode. I'll pull it out of the doc, for now.
I think killing the One-Pin mode is a little drastic, some fine tuning is all that is needed.
A verbose async echo is likely a problem, but you could have a simple handshake ACK scheme ?
eg if the MCU sends 0xFF for example, the P2 could echo a single char as a Good/Bad, since last query ?
Some RF links are more half-duplex in nature, so this could be better than unsolicited replies.
At the 20MHz RC osc rate, above 115,200 baud works, but by the time you get to 230,400 baud, it becomes iffy. So, I just say it's limited to 115,200 baud.
230400 I make a 1.152% step size on a 20MHz clock, which is +/- 0.576% from an ideal value.
That should be ok, with a 1 SysCLK granular AutoBAUD, and even less than that, if using more than one bit time, or fractional baud schemes.
The main issue now, is how to sync the load, for best speed, and how to manage reboot on SW reset ?
I think P2 needs to send a ready Char ?
Addit: I coded some more, and I think the suggested ACK scheme above can also be used to signal READY.
MCU can send a 500us~1ms ENQ char, and listen for a ACK/Rdy - then it knows it can stream loader code from then.
I used 0xFE, as that is more tolerant of reset exit, and idles mostly hi, so is more baud-rate tolerant.
This removes possible variations in Reset CAP / Pullups, and drops the boot time to no more than 500us from Reset ready.
It also removes the risk of missing the ready char, by simply ensuring both MCU and P2 have exited reset before it proceeds.
I coded a loop in the Boot MCU for 500us timed ENQs, in 29 bytes. (One-Pin tolerant)
Adding a single char ENQ and a single char choice for OK/RDY or Error since last ENQ should be simple to do ?
This should also make PC links easier to manage, as they can do a similar 1-2ms polling cadence over USB-UARTS
It could maybe AutoBAUD on the ENQ char too ?
I doubt that P2 FPGA source would ever be released as that would be commercial suicide. Parallax will have to sell lots of P2 silicon just to recoup investment alone. Perhaps when P3 is in silicon they might consider it but AFAIK chip companies have never ever done this before and Parallax are the only ones to have taken this bold step into this Brave New Open World.
One possibility would be to release it in an encrypted form. (`pragma protect begin ... `pragma protect end) It would take work to set this up, but it could enable others to target FPGA platforms. I think that it would have to be structured in a way that Altera-specific code was not encrypted so that it could be ported, and Parallax would have to have confidence that it couldn't be misused.
I imagine that Chip is so busy right now that this isn't going to happen. But perhaps if all the FPGA targeting is consuming a lot of time, he would consider making an encrypted release to a trusted (under NDA?) third party willing to take on that burden. Otherwise there probably isn't any immediate benefit to Parallax in such an exercise.
The other question is whether there is a way to encapsulate the P2 into an IP block so it could be adapted onto different platforms.
I'd pay for that right now, were it possible, and Parallax would retain the source. ie A block that's one step back from the RBF we currently use, but would allow us to do our own QSF pin connections, and/or stuff some glue logic in there.
edit: The other benefit is it might free up a bit of time for Chip, since the compiling and testing for all the different platforms could be looked after by others.
I'd pay for that right now, were it possible, and Parallax would retain the source. ie A block that's one step back from the RBF we currently use, but would allow us to do our own QSF pin connections, and/or stuff some glue logic in there.
What process and packages would you target ?
I know IC vendors often do a joint development, where a lead customer gets design-input, and sometimes an early start from first production allocation. That works well where the part has a usable general market, on top of the customer's use.
Well, any process/platform/package becomes available, within the Altera range. That flexibility is worth a fair bit in itself
My local Altera rep is on leave for a few more weeks, I'll find out whats involved. I'd like to understand more about the workflow, regardless of whether Parallax are interested or not
Well, any process/platform/package becomes available, within the Altera range. That flexibility is worth a fair bit in itself
My local Altera rep is on leave for a few more weeks, I'll find out whats involved. I'd like to understand more about the workflow, regardless of whether Parallax are interested or not
If you wanted to pick an Altera FPGA and give me a list of Prop2 pins, I'll do a compile for you. I could also give you a few pointers on laying out an FPGA board.
Thanks Chip, if I can pick your brain for half a hour on Skype or cell that'd be most helpful.
We're down to a few longs in the DE0-Nano and frankly all growth options from have drawbacks. We've had some preliminary discussions with Terasic about something custom, but there would be a significant MOQ for that approach. Still, it's the best option from a minimal time/resource point of view, so far.
I will document the instructions next. That's a big hole, currently.
One omission in the instruction list is the second variant of the GETRND that returns a random bit in the nominated C/Z flag.
CCCC 1101011 CZ0 DDDDDDDDD 000011011 GETRND D {WC,WZ}
CCCC 1101011 CZ1 000000000 000011011 GETRND WC,WZ (Must be at least one effect used)
BTW Chip, In P2-Hot we had GETLFSR which was local to each cog and now we have GETRND in the current P2.
In another thread you indicated that this is now hub based. I assume it is now free running as opposed to the P2-Hot variant?
Thanks, jmg, for all your pushing. I got the autobaud working with a lot of headroom at 460,800 baud, worst RC OSC case. It's probably good for ~700k baud, worse case. I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud, but in the end, I thought it was much safer to always autobaud from scratch, as we don't know how much time delay there could be between bytes, which would allow for RC drift. What we have now is very solid. The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.
I changed the serial error reporting so that now "?" causes a "." (okay) or a "!" (error) character to be sent at any time within or after a command. The "." and "!" characters are now treated as white space, so there is no conflict with single-pin loading schemes, where the serial receiver is going to see the serial output, as well as incoming data. You just have to allow time for such interaction. The old way was kind of a mess. This is quite clean.
Also, RDPIN/WRPIN/WXPIN/WYPIN all automatically generate an AKPIN, so this simplifies code quite a bit. You almost never need a discrete AKPIN, but it's still there in case one of the others is not desirable.
The smart pins now return a C flag, so the time-states mode doesn't need to put the last-state bit into the MSB of the result, but conveys it via C. The USB conveys the error bit via C.
The serial receiver modes now leave the data MSB-justified, so it is up to you to do a 'SHR x,#24' after receiving a byte, for example. This cleaned up the hardware and simplifies the description.
Lastly, all those RDPIN/WRPIN conduits are 32 bits, taking only 2 clocks, so timing is much faster. This should help USB full-speed quite a bit.
I will document the instructions next. That's a big hole, currently.
One omission in the instruction list is the second variant of the GETRND that returns a random bit in the nominated C/Z flag.
CCCC 1101011 CZ0 DDDDDDDDD 000011011 GETRND D {WC,WZ}
CCCC 1101011 CZ1 000000000 000011011 GETRND WC,WZ (Must be at least one effect used)
BTW Chip, In P2-Hot we had GETLFSR which was local to each cog and now we have GETRND in the current P2.
In another thread you indicated that this is now hub based. I assume it is now free running as opposed to the P2-Hot variant?
That's correct. And each cog gets a different pattern of bits.
I changed the serial error reporting so that now "?" causes a "." (okay) or a "!" (error) character to be sent at any time within or after a command. The "." and "!" characters are now treated as white space, so there is no conflict with single-pin loading schemes, where the serial receiver is going to see the serial output, as well as incoming data. You just have to allow time for such interaction. The old way was kind of a mess. This is quite clean.
Sounds great - does the AutoBaud character also echo a "." too ? In my OnePin code, coverage of use cases requires that the Autobaud echo some char when done.
This allows the loading MCU to continually ping the AutoBaud char, and when it sees the expected echo, it can immediately start download.
Having these as NOPs is a good idea.
To support single-pin schemes, with this fastest-response handshake, I was planning on two Autobaud Chars.
One char selects one-Pin, & the other selects 2 pin, the decision is a simple part of the valid-limits test.
Thanks, jmg, for all your pushing. I got the autobaud working with a lot of headroom at 460,800 baud, worst RC OSC case. It's probably good for ~700k baud, worse case. I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud, but in the end, I thought it was much safer to always autobaud from scratch, as we don't know how much time delay there could be between bytes, which would allow for RC drift. What we have now is very solid. The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.
I'm not following all of this.
What does "two-stage autobaud scheme" & "autobaud from scratch" mean, and how does what you have done instead, differ ?
Is Fractional Baud still in there ?
If so, how does the fractional bits map to the 10 available bit-add slots ?
If you want to allow & follow for RC drift, I think you mean Live Autobaud-Tracking.
and the ideal char for this, which I'll call AutoBaud-Tracking, is 0x55 "U", which is unique in having the most edges in a given time. (of course, remove "U" from 64b table)
I think the Smart Pins ability to Time X edges on B, started by A can be used here, viz:
The Pin Docs are not easy to follow, but needed is simple
* Start measurement on Falling edge A (here the Start bit)
* Count 5(X=5) _/= on B, then Capture time from Start ie @ Stop bit, t9-baud capture
* Wait for read and re-arm on read
I think that is supported now ?
That hardware drops to a single* Smart-Pin read during RX & the AutoBaud-Track code becomes a very small and fast
INT_RX:
nRise = CaptF_5thR() // read and [u]re-prime.
IF nRise <= t9*1.1 THEN // only possible on "U", all others > 1.2
t9 = nRise // Update and trim for temperature drift changes
TxChar(AutoBaudEcho) // optional ack the AutoBaud-Track char
END
* dual capture & dual read is still needed on First Autobaud char), as you need to reject mid-char-reset-exit case.
... The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.
If the SHA imposes some ~1MBd limit, why not use the 3MBd code, to work all the way up to that ~1M Baud ceiling ?
The less time spend on Char Rx, the more time you have for SHA code, so highest-speed AutoBaud code is still useful here.
eg Above, I have very compact and fast AutoBaud tracking code, which should be 3MBd capable and give many spare cycles at 1MBaud.
In my One-Pin code, I also changed the 64b mapping table, as your original has 5 decisions, which is both slower and larger than it needs to be.
That can pack down to 1 or 2 decisions, and still stay ASCII, and that also boosts spare cycles.
Comments
All flavours of V12 loaded Ok on their respective platforms.
No, just 3-pin.
Thanks for checking all those.
" Prop_Txt - 0 0 0 0 "
Might pay to put an exact example in the DOCs ?
~10ms from what ? From the end of a valid IP string, start bit, or any edge ?
What about any error messages - are they also 10ms from last-RX activity ?
Prop_Txt is a little unclear, but from other examples I infer it terminates in either
" ~" ie Space+Tilde or
"|" followed by 32*8/6 = 42.666 Base64 + CR
That means 43 base 64 chars, with last 2 bits don't care ?
With a 100ms upper limit, and a 10ms+ lower limit, plus add in reset-cap effects, and this all starts to narrow down....
How long from reset pin (rapid) rise, until the P2 can sense RX ?
What happens if a 20h char is partway through, when the P2 exits Reset ?
With two low regions, which one is used for AutoBaud ?
If the P2 sends its verbose CR+LF+”FAIL”+CR+LF in One-Pin mode, that's going to scramble the Rx and cause more fails... ?
"If an external pull-up resistor is sensed on P60 (SPI_CK):"
How is that external pull-up resistor sensed - does it drive P60 low, briefly, then read after release to see Floating ?
Typical One-Pin use would be to have the MCU pulse the P2 reset, but then it needs to wait for any RST cap to RAMP, so is unsure when to start.
If it starts wrong, the load will fail, and speed is quite important here.
If P2 sent 0xFF, or 0xFE, (at the uncal 115200?) on RXD(P63) when it was ready, the MCU could sense that, and start with best precision. It no longer has to hope.
What could the P2 Auto-baud up to ? Most small MCUs these days have CalOsc and can go well above 115200.
eg 345600 ? (22.1184M/64)
Post #1 shows the platforms that are supported.
Probably be years before P2 code is released...
P2 chips aren't even available yet...
I doubt that P2 FPGA source would ever be released as that would be commercial suicide. Parallax will have to sell lots of P2 silicon just to recoup investment alone. Perhaps when P3 is in silicon they might consider it but AFAIK chip companies have never ever done this before and Parallax are the only ones to have taken this bold step into this Brave New Open World.
Thanks for noticing all these ambiguities. I went through and added explanations to cover most of them.
It's true that responding to an error in half-duplex could cause more errors. Maybe half-duplex is a bad idea. It really doesn't make sense when considering the inappropriate-character-error, which would only happen by accident, at any random time, and likely be clobbered by ongoing, incoming data. I think I will just get rid of half-duplex mode. I'll pull it out of the doc, for now.
At the 20MHz RC osc rate, above 115,200 baud works, but by the time you get to 230,400 baud, it becomes iffy. So, I just say it's limited to 115,200 baud.
I think killing the One-Pin mode is a little drastic, some fine tuning is all that is needed.
A verbose async echo is likely a problem, but you could have a simple handshake ACK scheme ?
eg if the MCU sends 0xFF for example, the P2 could echo a single char as a Good/Bad, since last query ?
Some RF links are more half-duplex in nature, so this could be better than unsolicited replies.
That could work in both modes ?
230400 I make a 1.152% step size on a 20MHz clock, which is +/- 0.576% from an ideal value.
That should be ok, with a 1 SysCLK granular AutoBAUD, and even less than that, if using more than one bit time, or fractional baud schemes.
The main issue now, is how to sync the load, for best speed, and how to manage reboot on SW reset ?
I think P2 needs to send a ready Char ?
Addit: I coded some more, and I think the suggested ACK scheme above can also be used to signal READY.
MCU can send a 500us~1ms ENQ char, and listen for a ACK/Rdy - then it knows it can stream loader code from then.
I used 0xFE, as that is more tolerant of reset exit, and idles mostly hi, so is more baud-rate tolerant.
This removes possible variations in Reset CAP / Pullups, and drops the boot time to no more than 500us from Reset ready.
It also removes the risk of missing the ready char, by simply ensuring both MCU and P2 have exited reset before it proceeds.
I coded a loop in the Boot MCU for 500us timed ENQs, in 29 bytes. (One-Pin tolerant)
Adding a single char ENQ and a single char choice for OK/RDY or Error since last ENQ should be simple to do ?
This should also make PC links easier to manage, as they can do a similar 1-2ms polling cadence over USB-UARTS
It could maybe AutoBAUD on the ENQ char too ?
One possibility would be to release it in an encrypted form. (`pragma protect begin ... `pragma protect end) It would take work to set this up, but it could enable others to target FPGA platforms. I think that it would have to be structured in a way that Altera-specific code was not encrypted so that it could be ported, and Parallax would have to have confidence that it couldn't be misused.
I imagine that Chip is so busy right now that this isn't going to happen. But perhaps if all the FPGA targeting is consuming a lot of time, he would consider making an encrypted release to a trusted (under NDA?) third party willing to take on that burden. Otherwise there probably isn't any immediate benefit to Parallax in such an exercise.
I'd pay for that right now, were it possible, and Parallax would retain the source. ie A block that's one step back from the RBF we currently use, but would allow us to do our own QSF pin connections, and/or stuff some glue logic in there.
edit: The other benefit is it might free up a bit of time for Chip, since the compiling and testing for all the different platforms could be looked after by others.
What process and packages would you target ?
I know IC vendors often do a joint development, where a lead customer gets design-input, and sometimes an early start from first production allocation. That works well where the part has a usable general market, on top of the customer's use.
I saw this news go past recently...
http://www10.edacafe.com/nbc/articles/1/1457073/MagnaChip-Announces-Cost-Competitive-0.13-micron-Slim-Flash-Process-Technology
Sounds Microcontroller/P2 compatible.
My local Altera rep is on leave for a few more weeks, I'll find out whats involved. I'd like to understand more about the workflow, regardless of whether Parallax are interested or not
If you wanted to pick an Altera FPGA and give me a list of Prop2 pins, I'll do a compile for you. I could also give you a few pointers on laying out an FPGA board.
We're down to a few longs in the DE0-Nano and frankly all growth options from have drawbacks. We've had some preliminary discussions with Terasic about something custom, but there would be a significant MOQ for that approach. Still, it's the best option from a minimal time/resource point of view, so far.
For example (looking at ozpropdev's code), "setbyts" apparently copies the low byte of the source long to all four bytes of the destination.
But, I can't find anywhere that documents this...
The link in the top post just says this:
SETBYTS D , S/# Set Bytes
Set ?
Same for movbyts, splitb, mergeb, splitw, mergew, etc...
I will document the instructions next. That's a big hole, currently.
BTW Chip, In P2-Hot we had GETLFSR which was local to each cog and now we have GETRND in the current P2.
In another thread you indicated that this is now hub based. I assume it is now free running as opposed to the P2-Hot variant?
Thanks, jmg, for all your pushing. I got the autobaud working with a lot of headroom at 460,800 baud, worst RC OSC case. It's probably good for ~700k baud, worse case. I did a lot of work with a two-stage autobaud scheme and had it running at 3M baud, but in the end, I thought it was much safer to always autobaud from scratch, as we don't know how much time delay there could be between bytes, which would allow for RC drift. What we have now is very solid. The SHA-256/HMAC now works as the data comes in, so that puts a 100k byte/second speed limit on things, making the theoretical ceiling only 1M baud, anyway.
I changed the serial error reporting so that now "?" causes a "." (okay) or a "!" (error) character to be sent at any time within or after a command. The "." and "!" characters are now treated as white space, so there is no conflict with single-pin loading schemes, where the serial receiver is going to see the serial output, as well as incoming data. You just have to allow time for such interaction. The old way was kind of a mess. This is quite clean.
Also, RDPIN/WRPIN/WXPIN/WYPIN all automatically generate an AKPIN, so this simplifies code quite a bit. You almost never need a discrete AKPIN, but it's still there in case one of the others is not desirable.
The smart pins now return a C flag, so the time-states mode doesn't need to put the last-state bit into the MSB of the result, but conveys it via C. The USB conveys the error bit via C.
The serial receiver modes now leave the data MSB-justified, so it is up to you to do a 'SHR x,#24' after receiving a byte, for example. This cleaned up the hardware and simplifies the description.
Lastly, all those RDPIN/WRPIN conduits are 32 bits, taking only 2 clocks, so timing is much faster. This should help USB full-speed quite a bit.
That's correct. And each cog gets a different pattern of bits.
Seem to remember some caveats for usage...
I'm trying to remember why we need to use "LOC". The keyword LOC is mentioned in the docs in relation to PA/PB, but is not defined there.
Actually, I don't think "LOC" is defined anywhere in any of the docs...
Sounds great - does the AutoBaud character also echo a "." too ?
In my OnePin code, coverage of use cases requires that the Autobaud echo some char when done.
This allows the loading MCU to continually ping the AutoBaud char, and when it sees the expected echo, it can immediately start download.
Having these as NOPs is a good idea.
To support single-pin schemes, with this fastest-response handshake, I was planning on two Autobaud Chars.
One char selects one-Pin, & the other selects 2 pin, the decision is a simple part of the valid-limits test.
What does "two-stage autobaud scheme" & "autobaud from scratch" mean, and how does what you have done instead, differ ?
Is Fractional Baud still in there ?
If so, how does the fractional bits map to the 10 available bit-add slots ?
If you want to allow & follow for RC drift, I think you mean Live Autobaud-Tracking.
I looked at that in another thread,
http://forums.parallax.com/discussion/comment/1389873/#Comment_1389873
and the ideal char for this, which I'll call AutoBaud-Tracking, is 0x55 "U", which is unique in having the most edges in a given time. (of course, remove "U" from 64b table)
I think the Smart Pins ability to Time X edges on B, started by A can be used here, viz:
The Pin Docs are not easy to follow, but needed is simple
* Start measurement on Falling edge A (here the Start bit)
* Count 5(X=5) _/= on B, then Capture time from Start ie @ Stop bit, t9-baud capture
* Wait for read and re-arm on read
I think that is supported now ?
That hardware drops to a single* Smart-Pin read during RX & the AutoBaud-Track code becomes a very small and fast
* dual capture & dual read is still needed on First Autobaud char), as you need to reject mid-char-reset-exit case.
Sounds very good this far...
If the SHA imposes some ~1MBd limit, why not use the 3MBd code, to work all the way up to that ~1M Baud ceiling ?
The less time spend on Char Rx, the more time you have for SHA code, so highest-speed AutoBaud code is still useful here.
eg Above, I have very compact and fast AutoBaud tracking code, which should be 3MBd capable and give many spare cycles at 1MBaud.
In my One-Pin code, I also changed the 64b mapping table, as your original has 5 decisions, which is both slower and larger than it needs to be.
That can pack down to 1 or 2 decisions, and still stay ASCII, and that also boosts spare cycles.