' Copyright Brad Campbell, 2007 - All rights reserved ' ' 3 IO pins used. Enable, D+, D- ' Current configuration. ' 1 - D- ' 2 - D+ ' 3 - Enable ' Tie 3 to 1 with a 3.3k resistor and put a pair of 68Ohm resistors in series with D+ and D- to the host PUB Start(crctable, buffers) : OK | i Pcrcmask := crctable epb := buffers OK := cognew(@USB_STACK, 0) DAT USB_STACK org mov aring, #0 mov packetcnt, #0 mov packetfail, #0 mov address, #0 mov bmask, K wrlong allones, epb ' Semaphore to wait for app thread to be running or bmask, J :sloop rdlong da, epb test da, allones wz IF_NZ jmp #:sloop ' Loop until it is or outa, enable or dira, enable ' Tell the host we are alive! jmp #wfs ' -------------------------------------------- ' Receive low level routines ' -------------------------------------------- ' ok, this recieve loop is about right. +/- 1 nop anywhere and it breaks. Same with #wfs l1 add bufptr, #1 ' 12 post inc pointer to allow next mov to pipeline l1ind mov 0-0, acc ' 13 store contents of buffer mov bitcnt, #32 ' 14 reload bit counter loop test K, ina wz ' 1 - 13 bits muxz eor, emask wc ' 2 - top 2 bits contain previous and new values.. shl eor, #1 ' 3 - parity check 00 or 11 is odd and 01 or 10 is even test bmask, ina wz ' 4 - Check for EOP IF_Z jmp #wfe ' 5 rcr acc, #1 ' 6 - rotate carry from xor into top bit of acc rcl stuff, #1 ' 7 - Bottom 7 bits are all 0 we need to unstuff and stuff, #$3F wz ' 8 - test is inverted as input bits are inverted IF_Z call #dounstuff ' 9 - we need to unstuff next bit sub bitcnt, #1 wz ' 10 IF_Z jmp #l1 ' 11 movd l1ind, bufptr ' 12 - This saves a cycle in #l1 and does not hurt to keep repeating it nop loop1 test K, ina wz ' 1 - 13 bits muxz eor, emask wc ' 2 - top 2 bits contain previous and new values.. shl eor, #1 ' 3 - parity check 00 or 11 is odd and 01 or 10 is even test bmask, ina wz ' 4 - Check for EOP IF_Z jmp #wfe ' 5 rcr acc, #1 ' 6 - rotate carry from xor into top bit of acc rcl stuff, #1 ' 7 - Bottom 7 bits are all 0 we need to unstuff and stuff, #$3F wz ' 8 - test is inverted as input bits are inverted IF_Z call #dounstuff ' 9 - we need to unstuff next bit sub bitcnt, #1 wz ' 10 IF_Z jmp #l1 ' 11 movd l1ind, bufptr ' 12 nop loop2 test K, ina wz ' 1 - 14 bits muxz eor, emask wc ' 2 - top 2 bits contain previous and new values.. shl eor, #1 ' 3 - parity check 00 or 11 is odd and 01 or 10 is even test bmask, ina wz ' 4 - Check for EOP IF_Z jmp #wfe ' 5 rcr acc, #1 ' 6 - rotate carry from xor into top bit of acc rcl stuff, #1 ' 7 - Bottom 7 bits are all 0 we need to unstuff and stuff, #$3F wz ' 8 - test is inverted as input bits are inverted IF_Z call #dounstuff ' 9 - we need to unstuff next bit sub bitcnt, #1 wz ' 10 IF_Z jmp #l1 ' 11 movd l1ind, bufptr ' 12 nop jmp #loop ' 14 dounstuff nop ' 10 nop ' 11 nop ' 12 nop ' 13 or flags, flag_sun ' 1 test K, ina wz ' 2 muxz eor, emask wc ' 3 shl eor, #1 ' 4 rcl stuff, #1 ' 5 test bmask, ina wz ' 6 IF_Z jmp #wfe ' 7 test stuff, #$7F wz ' 8 IF_Z or flags, flag_serr ' 9 - Stuffing error! 8 1's in a row!! dounstuff_ret ret ' 10 ' The delays in here are critical. I'm not sure why but +/- one nop and it all ' goes to hell! wfs mov bitcnt, #32 ' Wait For Start mov stuff, #$FF ' - Stuff register is inverted mov bufptr, #buf1 ' Init indirect buffer pointer - need to check this mov eor, #0 ' Zero xor register mov flags, zero ' Set up status flags :wfs0 mov da, ina and da, bmask wz IF_Z jmp #wait_reset cmp da, K wz IF_NZ jmp #:wfs0 ' K :wfs1 waitpeq J, #3 mov delay, cnt add delay, #20 ' Delay half bit time - calcs waitcnt delay, #53 mov da, ina and da, bmask cmp da, K wz IF_NZ jmp #:wfs1 ' K waitcnt delay, #53 mov da, ina and da, bmask cmp da, K wz IF_NZ jmp #:wfs1 ' K waitcnt delay, #54 jmp #loop ' This just dumps a reset message currently - it needs to reset the address ' when we actually make a real stack and enumerate with something wait_reset test bmask, ina wz IF_Z add reset, #1 IF_Z jmp #wait_reset cmp reset, #$1FF wc mov reset, #0 IF_C jmp #wfs ' Not long enough to be a reset mov reset, #0 mov address, #0 mov eps, #$FF mov state, #0 jmp #wfs wfe ' Wait For End movd :wind, bufptr add bitcnt, #3 ' Round bitcount up to compensate for ' Dodgy timing and bitcnt, #$F8 ' Scrape off lower 3 bits ' Shift final word to sit properly in lsb order shr acc, bitcnt :wind mov 0-0, acc cmp bitcnt, #18 wc ' Not even one full byte in IF_BE jmp #process_inbound jmp #wfs ' -------------------------------------------- ' Transmit low level routines ' Transmit speed is fast by 0.625% (spec says we can be up to 1.5% so it's all good) ' Most of the transmit loop is instruction critical and it *will* lock up if you get it wrong ' If you are lucky it will lock up on a J, but if it locks up on a K or SOP it *will* take out the ' USB port and require a restart. Testing through a HUB is a good idea as you can cycle the HUB. ' -------------------------------------------- ' Bitstuff the next bit txs waitcnt delay, #53 xor outa, bmask shl stuff, #1 or stuff, #1 txs_ret ret ' Move onto the next byte but invert it first (we transmit inverted to make stuffing easy) reload mov bitcnt, #8 reload_ind mov acc, 0-0 sub bytecnt, #1 wz IF_Z jmp #tx_eop ' Put this here rather than the end to give us xor acc, #$FF ' A spare cycle or two ensuring the EOP waitcnt add reload_ind, #1 ' does not hang tx1 waitcnt delay, #53 stx1 rcr acc, #1 wc IF_C xor outa, bmask rcl stuff, #1 and stuff, #$3F wz IF_Z call #txs tx1e sub bitcnt, #1 wz test acc, #1 wz ' Placing this here gives us a little extra shl stuff, #1 ' room in the reload routine to allow xor of muxnz stuff, #1 ' data and proper indirect loading and stuff, #$3F wz ' Transmitting pairs of bits ping/pong buys us this sub bitcnt, #1 tx2 waitcnt delay, #53 rcr acc, #1 wc IF_C xor outa, bmask tjnz stuff, #tx2r call #txs tx2r tjz bitcnt, #reload jmp #tx1 tx_eop waitcnt delay, #53 ' This delay is *tight* andn outa, bmask ' Start the EOP mov delay, cnt add delay, #106 ' Delay 2 bit times waitcnt delay, #53 or outa, J ' Force a J waitcnt delay, #53 ' Delay one bit time... and andn dira, bmask ' Switch off the drivers andn outa, bmask jmp #wfs do_tx mov bitcnt, #8 mov acc, #$7F ' Sync byte (inverted as we never xor that one) movs reload_ind, #b_tkn ' Set up indirect reload register add bytecnt, #1 ' Add sync byte to count mov stuff, #$FF ' Stuff register is inverted andn outa, bmask or outa, J ' Set pins up as a J before.... or dira, bmask ' enabling the drivers mov delay, cnt add delay, #57 ' Proper time for us to get settled jmp #stx1 ' .... and away we go ' -------------------------------------------- ' Protocol stack routines ' -------------------------------------------- ' It appears that all empty frames go out as a DATA1 send_empty_data1 mov b_1, #0 mov b_2, #0 mov b_tkn, #$4B mov bytecnt, #3 jmp #do_tx send_ack mov b_tkn, #$D2 ' PID ACK mov bytecnt, #1 jmp #do_tx send_nak mov b_tkn, #$5A ' PID NAK mov bytecnt, #1 jmp #do_tx pid_setup or state, flag_setup jmp #wfs pid_out andn state, flag_setup jmp #wfs pid_in mov da, epoint shl da, #4 ' Multiply by 8 add da, epb ' Add eps offset add da, #64 ' Output endpoints rdlong db, da ' Endpoint out byte counter test db, allones wz IF_Z jmp #send_nak ' Nothing to send mov dc, db andn dc, #$FF or state, dc ' Set flags based on endpoint test db, flag_empty wz ' If we need to send an empty packet... IF_NZ jmp #send_empty_data1 ' ... just do it mov bytecnt, db and bytecnt, #$0F ' Clamp max in case app level goes haywire add da, #4 ' Unpack the bytes from here down (inline for speed) rdlong db, da ' Endpoint out bytes [0-3] mov b_1, db shr db, #8 mov b_2, db shr db, #8 mov b_3, db shr db, #8 mov b_4, db add da, #4 rdlong db, da ' Endpoint out bytes [4-7] mov b_5, db shr db, #8 mov b_6, db shr db, #8 mov b_7, db shr db, #8 mov b_8, db add da, #4 rdlong db, da ' Endpoint out bytes [8-9] mov b_9, db shr db, #8 mov b_10, db mov da, #1 shl da, epoint test eps, da wz ' Make sure data toggle is correct for token IF_NZ mov b_tkn, #$C3 IF_Z mov b_tkn, #$4B add bytecnt, #1 ' Allow for token jmp #do_tx ' Timing here is with about 2 instructions of the absolute limit. pid_data test state, flag_setup wz IF_Z jmp #data_1 ' Is this a setup packet?? or flags, flag_setup mov da, b_2 and da, #$FF cmp da, #5 wz ' Do we need to set a new Address ? IF_NZ jmp #data_1 mov da, b_3 and da, #$7F mov paddr, da ' Make it pending until proper status transaction is complete data_1 mov da, epoint shl da, #4 ' Multiply by 4 add da, epb ' Add eps offset rdlong db, da ' Endpoint byte counter test db, allones wz IF_NZ jmp #send_nak ' Other thread is not finished with this buffer ' Shuffle to convert buffers (Bytes 8&9 are CRC and we have already confirmed them) ' IN X987 6543 210X (Byte X - next to byte 0 - is the token. Un-needed) ' OUT 7654 3210 mov db, buf1 shr db, #8 mov dc, buf1a shl dc, #24 or db, dc ' db is 1st long mov dd, buf1a shr dd, #8 mov dc, buf1b shl dc, #24 or dd, dc ' dd is 2nd long ' Write buffer for notice of application thread ' Always write byte count / flags last to ensure we don't try to read while ' the buffer is half written add da, #4 ' Endpoint bytes [0-3] wrlong db, da ' Write 2nd long add da, #4 ' Endpoint bytes [4-7] sub bytecnt, #3 ' Minus token and CRC wrlong dd, da ' Write 1st long sub da, #8 or bytecnt, flags wrlong bytecnt, da ' Number of bytes in jmp #send_ack pid_data0 call #crc16check test flags, flag_crc wz IF_NZ jmp #wfs ' Packet was bad mov da, #1 shl da, epoint mov db, eps test db, da wz IF_Z jmp #send_ack ' We already have this data packet andn eps, da ' Clear that endpoint toggle bit jmp #pid_data pid_data1 call #crc16check test flags, flag_crc wz IF_NZ jmp #wfs ' Packet was bad mov da, #1 shl da, epoint mov db, eps test db, da wz IF_NZ jmp #send_ack ' We already have this data packet or eps, da ' Set that endpoint toggle bit jmp #pid_data pid_ack mov da, #1 shl da, epoint xor eps, da ' Toggle endpoint state mov da, epoint shl da, #4 ' Multiply by 8 add da, epb ' Add eps offset add da, #64 ' Output endpoints wrlong zero, da ' Tell other thread we are ready for more data test state, flag_address wz ' on this endpoint IF_Z jmp #wfs mov address, paddr ' Set the new address andn state, flag_address ' Clear the flag baby! jmp #wfs ' This is about 3x the speed of doing it in a loop one byte at a time ' 23 instructions unpack_buffer mov da, buf1 mov b_tkn, da shr da, #8 mov b_1, da shr da, #8 mov b_2, da shr da, #8 mov b_3, da mov da, buf1a mov b_4, da shr da, #8 mov b_5, da shr da, #8 mov b_6, da shr da, #8 mov b_7, da mov da, buf1b mov b_8, da shr da, #8 mov b_9, da shr da, #8 mov b_10, da unpack_buffer_ret ret ' This simply writes the inbound usb data to the buffer in the hub ' Oh, and checks CRC's now too. They could use some optimisation! ' Mangles da, db, dc, dd, bufptr, t, t1, data process_inbound xor buf1, allones xor buf1a, allones xor buf1b, allones call #unpack_buffer ' This next bit is essential as the hub code uses it to tell how many longs to pull ' and display as this inbound message ' This calculates the number of bytes in the message mov bytecnt, bufptr sub bytecnt, #buf1 shl bytecnt, #2 mov data, #$20 sub data, bitcnt shr data, #3 or bytecnt, data and b_tkn, #$FF ' data in and data out need address and endpoint of last token ' packet tjz state, #:next cmp b_tkn, #$c3 wz ' Data0 IF_Z jmp #pid_data0 cmp b_tkn, #$4b wz ' Data1 IF_Z jmp #pid_data1 cmp b_tkn, #$D2 wz ' Ack IF_Z jmp #pid_ack :next mov inaddr, b_1 and inaddr, #$7F cmp inaddr, address wz IF_NZ jmp #wfs ' Not addressed to us! mov da, b_1 shr da, #7 mov epoint, da and epoint, #$F cmp b_tkn, #$69 wz ' IN IF_Z jmp #pid_in cmp b_tkn, #$E1 wz ' OUT IF_Z jmp #pid_out cmp b_tkn, #$2D wz ' Setup IF_Z jmp #pid_setup jmp #wfs ' Obviously not a valid token ' Perform a crc check on recieved token packets ' Mangles da, db, dc ' Returns crc crc5check mov delay, cnt add delay, #130 waitcnt delay, #0 jmp #crc5check_ret mov crc, #$1F mov dc, b_1 and dc, #$FF call #crc5byte mov dc, b_2 and dc, #$FF call #crc5byte and crc, #$1F cmp crc, #$06 wz IF_NZ or flags, flag_crc crc5check_ret ret ' This is broken out so we can use it later, but then we never send tokens! ' This is horridly slow. crc5byte ' dc has the byte to add to the crc and dc, #$FF mov da, #8 ' - CRC bit counter crc5bitloop mov db, dc xor db, crc shr dc, #1 shr crc, #1 shr db, #1 wc if_c xor crc, #$14 djnz da, #crc5bitloop crc5byte_ret ret ' Perform a crc check on recieved data packets ' Mangles da, db, dc ' Returns crc ' Can optimse by removing processing the last 2 bytes and doing a compare instead crcmask long $0000FFFF crc16check ' 178 Instruction for a full buffer mov crc, crcmask or flags, flag_crcd movs :crc16indr, #b_1 mov da, bytecnt sub da, #1 :crc16indr mov dc, 0-0 add :crc16indr, #1 xor dc, crc ' 1 and dc, #$FF ' 2 shl dc, #1 ' 3 add dc, Pcrcmask ' 4 rdword db, dc ' 9.5 shr crc, #8 ' 10.5 xor crc, db ' 11.5 djnz da, #:crc16indr crc16done cmp crc, crcend wz IF_NZ or flags, flag_crc crc16check_ret ret deadbeef long $DEADBEEF epb long 0 debug long $7FF0 dly long $7FF8 reset long 0 allones long $FFFFFFFF zero long 0 K long $00000002 J long $00000001 bmask long $00000003 enable long $00000004 emask long $40000000 flag_class long 1 << 11 flag_setupdone long 1 << 12 flag_address long 1 << 13 flag_empty long 1 << 14 flag_setup long 1 << 15 flag_crcd long 1 << 28 flag_serr long 1 << 29 flag_sun long 1 << 30 flag_crc long 1 << 31 aring long 0 crcend long $B001 Pcrcmask long 0 ack long $AAAA nak long $FFFF acki long $CCCC mt long $ABCD oh long 0 one long 1 two long 2 three long 3 eps res 1 ' endpoint status token res 1 ' inbound token inaddr res 1 address res 1 ' device address epoint res 1 ' inbound endpoint setup res 1 state res 1 paddr res 1 t res 1 t1 res 1 data res 1 delay res 1 bitcnt res 1 acc res 1 stuff res 1 b_tkn res 1 b_1 res 1 b_2 res 1 b_3 res 1 b_4 res 1 b_5 res 1 b_6 res 1 b_7 res 1 b_8 res 1 b_9 res 1 b_10 res 1 buf1 res 1 ' TX/RX Buffer buf1a res 1 buf1b res 1 bufptr res 1 ' Buffer pointer da res 1 db res 1 dc res 1 dd res 1 leap res 1 eor res 1 ' Recieve xor register flags res 1 ' Recieve status flags bytecnt res 1 crc res 1 packetcnt res 1 packetfail res 1 dbg1 res 1 dbg2 res 1 pos res 1 fit $1F0