Faster Hub Update?

JonnyMac · 2021-05-13 22:42

I have this bit of code that transfers 14 words from the cog to the hub. It works fine, at only takes about 1.5us at 200MHz.

update_hub      mov       idx, #1
                mov       ptrb, p_hub
.loop           altgw     idx, #cogarray
                getword   t1
                wrword    t1, ptrb++
                incmod    idx, #14                      wc
    if_nc       jmp       #.loop

Could this be reduced to two lines with setq and wrword?

Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

TonyB_ · 2021-05-13 23:53

Fast block moves are long-sized only: setq + rdlong/wrlong/wmlong.

Cluso99 · 2021-05-13 23:57

@JonnyMac said:
Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

Oops - didn't totally read this. looking again

With your setup, probably this is going to be the best

update_hub      mov       idx, #1
                mov       ptrb, p_hub
                rep       @.rep, #14        ' write 14 words to hub
                altgw     idx, #cogarray
                getword   t1
                wrword    t1, ptrb++
                incmod    idx, #14      wc  ' no need for the wc
.rep

The biggest hit will be missing the hub rotation by using wrword. If the 14 hub words were long aligned instead of being offset, then packing the words in cog and using setq #7-1 + wrlong would be faster but you would need 7 more cog registers.

TonyB_ · 2021-05-14 00:06

        mov ptrb, p_hub
        getword t1, #cogarray, #1
        wrword  t1, ptrb++
        setq    #6-1
        wrlong  #cogarray+1, ptrb++
        wrword  #cogarray+7, ptrb++

JonnyMac · 2021-05-14 02:44

Thanks for the reminder about rep, Ray; that shaved off about 50 clock ticks which is not insignificant -- and the code still meets my maniacal styling preferences.

More details for those interested: The RC receiver spits out a 32-byte packet (FlySky I-BUS). The first two bytes are a header, the next 28 are the low and high bytes of the channel values. The final two bytes are the checksum, which is $FFFF minus the sum of the previous 30 bytes.

It's all working nicely. I'll get it written up as a Quick Byte. This is the current state of the PASM driver.

dat { i-bus rx driver }

                org

entry           setq      #3-1                                  ' get 3 longs from hub
                rdlong    rxd, ptra

                ' I-BUS rx loop
                ' -- wait for space between frames
                ' -- receive and store frame in cog array

ibus_rx         fltl      rxd                                   ' disable smart pin
                wrpin     #0, rxd                               ' restore tri-state mode

wquiet          mov       t1, #250                              ' (re)start idle timer
.wq1            waitx     us1                                   ' wait 1us
                testp     rxd                           wc      ' check
    if_nc       jmp       #wquiet                               ' if active, start over
                djnz      t1, #.wq1                             ' update idle timer

                wrpin     M_IBUS, rxd                           ' set to async rx
                drvl      rxd                                   ' activate
                mov       icog, #0                              ' initialize cog pointer
                mov       cs, ##$FFFF                           ' initialize checksum

get_ibus        testp     rxd                           wc      ' wait for a byte
    if_nc       jmp       #get_ibus

                rdpin     t1, rxd                               ' read new byte
                shr       t1, #24                               ' align lsb
                altsb     icog, #ibusraw                        ' store in hub array
                setbyte   t1
                cmp       icog, #30                     wcz     ' include byte (0..29) in checksum?
    if_b        sub       cs, t1
                incmod    icog, #31                     wc      ' update cog pointer for next byte
    if_nc       jmp       #get_ibus                             ' continue if more

                mov       icog, #0
                altgw     icog, #ibusraw
                getword   t1
                cmp       t1, ##$4020                   wcz     ' validate header
    if_ne       jmp       #bad_frame

                mov       icog, #15
                altgw     icog, #ibusraw
                getword   t1
                cmp       t1, cs                        wcz     ' validate checksum
    if_ne       jmp       #bad_frame

                wrlong    ##ON, ptra[5]                         ' set updating flag

update_hub      mov       ptrb, p_hub                           ' point to hub
                mov       icog, #1                              ' start with channel 1
                rep       @.loop, #14                           ' write 14 channels
                 altgw    icog, #ibusraw                        ' get ch from cog array
                 getword  t1
                 wrword   t1, ptrb++                            ' write to hub
                 incmod   icog, #14                             ' update index
.loop

                wrlong    #OFF, ptra[3]                         ' bad frame flag off

                rdlong    t1, ptra[4]                           ' update frames count
                add       t1, #1
                wrlong    t1, ptra[4]

                wrlong    #OFF, ptra[5]                         ' clear updating flag

                jmp       #ibus_rx                              ' back to top

bad_frame       wrlong    ##ON, ptra[3]                         ' bad frame flag on
                jmp       #ibus_rx                              ' back to top

' -------------------------------------------------------------------------------------------------

M_IBUS          long      P_ASYNC_RX                            ' smart pin rx uart, true mode

rxd             res       1                                     ' rx pin
us1             res       1                                     ' ticks in 1 microsecond
p_hub           res       1                                     ' pointer to channels in hub

icog            res       1                                     ' cog index
ibusraw         res       8                                     ' array to hold incoming bytes
cs              res       1                                     ' checksum of received frame

t1              res       1                                     ' temp vars
t2              res       1
t3              res       1

                fit       496

msrobots · 2021-05-14 05:08

@Cluso99 said:

@JonnyMac said:
Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

Oops - didn't totally read this. looking again

With your setup, probably this is going to be the best
update_hub      mov       idx, #1
                mov       ptrb, p_hub
                rep       @.rep, #14        ' write 14 words to hub
                altgw     idx, #cogarray
                getword   t1
                wrword    t1, ptrb++
                incmod    idx, #14      wc  ' no need for the wc
.rep                
The biggest hit will be missing the hub rotation by using wrword. If the 14 hub words were long aligned instead of being offset, then packing the words in cog and using setq #7-1 + wrlong would be faster but you would need 7 more cog registers.

Yeah, that makes sense. But not smart enough.

@JonnyMac think about it this way, your cog array is not 8 longs but 9 longs.
and you start writing/using your buffer from #cogarray word index 1 not 0.

The first word in #cogarray is 0. Then the Header word, then your Payload of 14 words is long aligned and you can just transfer 7 longs with setq to any HUB address and meet the hub windows.

Enjoy!

Mike

JonnyMac · 2021-05-14 05:53

I understand. As ever, my first priority is clean code that doesn't have newcomers scratching their heads. For my own projects -- that will not be released into the general public -- I would add the extra long to the cog array and offset where the frame bytes are laid in such that channel 1 is long aligned. That's a good trick that I will keep handy, but not use in a bit of code that I will probably document for newcomers to learn from. This is for the FlySky I-BUS; I'm sure it will have some interest being the components are inexpensive. Next up is having the P2 respond to telemetry queries. I ordered some sensors so that I can watch the transactions between them and the RC receiver.

evanh · 2021-05-14 08:57

This should improve hub access timing:

update_hub      mov       ptrb, p_hub                           ' point to hub
                mov       icog, #1                              ' start with channel 1
                mov       t2, ##ibusraw | 1<<9                  ' ch array, and increment icog each iteration
                rep       @.loop, #14                           ' write 14 channels
                 altgw    icog, t2                              ' get ch from cog array
                 getword  t1
                 wrword   t1, ptrb++                            ' write to hub
.loop

Cluso99 · 2021-05-14 08:59

Those SkyFlys are certainly cheap enough.
Makes my hardly used Spektrum DX6i an expensive dinosaur
However, I have way too many projects to risk looking too closely!

TonyB_ · 2021-05-14 10:20

@TonyB_ said:

      mov ptrb, p_hub
      getword t1, #cogarray, #1
      wrword  t1, ptrb++
      setq    #6-1
      wrlong  #cogarray+1, ptrb++
      wrword  #cogarray+7, ptrb++

What is wrong with this?

evanh · 2021-05-14 11:03

Same thing that'll be wrong with mine - Too esoteric. Jon's making an effort to make his material educational.

Yanomani · 2021-05-14 11:28

@TonyB_ said:
@TonyB_ said:
        mov ptrb, p_hub
        getword t1, #cogarray, #1
        wrword  t1, ptrb++
        setq    #6-1
        wrlong  #cogarray+1, ptrb++
        wrword  #cogarray+7, ptrb++
What is wrong with this?

@evanh said:
Same thing that'll be wrong with mine - Too esoteric. Jon's making an effort to make his material educational.

https://charlesdickenspage.com/images/copping-twist-large.jpg

JonnyMac · 2021-05-14 14:58

@TonyB_ and @evanh: I incorporated a number of your suggestions into my S.BUS driver (the I-BUS driver came after), and thanked you for those suggestions in the released code, and publicly during my presentation for Parallax on Wednesday. I appreciate your input -- I am here to learn, too. There is nothing wrong with any of it.

Jon's making an effort to make his material educational.

Yes. I have a very specific, very beginner-friendly style that I seek to maintain for code that I will share through presentations or articles (Quick Bytes or Nuts & Volts). That is to say that I will often take clarity over performance for my public projects.

JonnyMac · 2021-05-14 15:09

Those SkyFlys are certainly cheap enough.

Yes, they are. I bought one because it would do S.BUS output. One of my best friends, Rick, is an animatronics engineer and puppeteer in the movie and TV industry. ATM, he's very busy working on a couple Star Wars related TV shows. He uses his own P1 board in most of his projects, but when S.BUS was required he'd have to use an Arduino-based board that one of the other guys in the shop provides. Now he can use the P1 or P2 (he hasn't started with it yet) in S.BUS projects.

Another benefit of FlySky is that some of the receivers have a telemetry port that has already been exploited by others (open source flight controllers). I'm going to give it a try, too. Ken Gracey has a robot boat that he likes to drive around Lake Tahoe. He's presently using XBee to return GPS coordinates -- I'm hoping to be able to send that back to the FlySky transmitter.

Cluso99 · 2021-05-14 19:12

WOW!
Getting telemetry back adds the complete icing on that cake!!!

potatohead · 2021-05-16 19:50

@TonyB_ said:

        mov ptrb, p_hub
        getword t1, #cogarray, #1
        wrword  t1, ptrb++
        setq    #6-1
        wrlong  #cogarray+1, ptrb++
        wrword  #cogarray+7, ptrb++

What is wrong with this?

That should be a quick byte all by itself. Beginner friendly totally has merit. People can get things going and once they do, no worries.

However, as a general tool to understand what can be done, walking people through this code, the fast zeroing code posted earlier and other high efficiency snippets can start to deliver for those newbies as they ramp up.

IMHO, of course.

JonnyMac · 2021-05-16 21:33

However, as a general tool to understand what can be done, walking people through this code, the fast zeroing code posted earlier and other high efficiency snippets can start to deliver for those newbies as they ramp up.

Agreed. The "problem" is that none of the PASM experts have or are willing to take the time to teach PASM from the ground up. I do my best by including small bits of PASM in my projects and strive to make that code inviting for newcomers. Sometimes, though, I will trade super-easy, obvious code for elegant code that's easy to explain. For example, in one section of my original S.BUS driver I had something like this.

check_floss     test    flags, #2                       wc
    if_c        neg     tmp1, #1
    if_nc       mov     tmp1, #0
                wrlong  tmp1, p_floss

Tony or Evan -- I'm sorry that I don't remember which -- suggested this:

check_floss     test    flags, #2                       wc
                subx    tmp1, tmp1
                wrlong  tmp1, p_floss

While not immediately obvious to most newcomers, it's very easy to explain -- and that's a requirement for me (since I do a lot of code explaining). Interestingly, one of their sophisticated suggestions was modified by Chip while I was giving my S.BUS presentation. Again, I started out very easy:

check_ch17      test    flags, #0                       wc
    if_c        mov     t2, ##$7FF
    if_nc       mov     t2, #0
                wrword  t2, ptrb++

check_ch18      test    flags, #1                       wc
    if_c        mov     t2, ##$7FF
    if_nc       mov     t2, #0
                wrword  t2, ptrb

I believe it was Evan that suggested the following change. Again, not immediately obvious to new PASM programmers, but easy to explain.

                mov     tmp1, #0

check_ch17      test    flags, #0                       wc
                bitc    tmp1, #00 | (10 << 5)

check_ch18      test    flags, #1                       wc
                bitc    tmp1, #16 | (10 << 5)   
                wrlong  tmp1, ptrb

Chip made it even easier to understand:

                mov     tmp1, #0

check_ch17      test    flags, #0                       wc
                bitc    tmp1, #00 addbits 10

check_ch18      test    flags, #1                       wc
                bitc    tmp1, #16 addbits 10  
                wrlong  tmp1, ptrb

We have some amazingly gifted PASM programmers in the community, but no defined path (e.g., a book) to get people at my PASM skillset (or lower) to theirs. I hate that people seem to feel slighted because I don't incorporate every one of their suggestions into my objects -- it's not a slight, it's a decision based on trying to help newcomers move from where they are to where they would like to be.

Cluso99 · 2021-05-17 03:39

Jon,
I congratulate you for keeping the code simple and easy to understand unless it's absolutely necessary for speed or space!
It's also easier to understand by experienced programmers even if they can see better ways, or, when you go back to the code at a later date.

BTW the above code is missing the line wrlong t2, ptrb++ in both check_ch17 sets.
And in check_ch18 bitc is setting tmp1 but the wrlong saves tmp2.

evanh · 2021-05-17 06:03

Certainly not slighted from over here. I knew before I posted my tweak that it probably wouldn't be a go. I even thought about roping in the FIFO with an alternative method but that has setup and completion requirements and Tony's method is still the fastest.

            bitc    tmp1, #00 addbits 10

I keep forgetting about those features in assembly. I'm more than comfortable with 10<<5.

potatohead · 2021-05-17 06:10

The "problem" is that none of the PASM experts have or are willing to take the time to teach PASM from the ground up.

Right. Please don't get me wrong. You are nailing it and I would never suggest you change a thing.

Was more about mentioning an opportunity for someone to fill a gap to come upon us soon.

Surac · 2021-05-17 07:16

deleted -

TonyB_ · 2021-05-17 10:21

I'm not upset my suggestion was not implemented, just a little bemused as the original question was about SETQ + WRxxxx. Below are various code options with cycle timings.

Option 1

                                                'cycles
update_hub      mov     ptrb, p_hub             ' 2
                mov     icog, #1                ' 2
                rep     @.loop, #14             ' 2
                altgw   icog, #ibusraw          ' 2*14
                getword t1                      ' 2*14
                wrword  t1, ptrb++              '(3..10)+(16-6)*7+(9-6)*6
                incmod  icog, #14               ' 2*14
.loop

'7 longs, 181..188 cycles

Option 2

                                                'cycles
update_hub      mov     ptrb, p_hub             ' 2
                mov     icog, #1                ' 2
                mov     t2, ##ibusraw | 1<<9    ' 4
                rep     @.loop, #14             ' 2
                altgw   icog, t2                ' 2*14
                getword t1                      ' 2*14
                wrword  t1, ptrb++              '(3..10)+(8-4)*7+(9-4)*6
.loop

'8 longs, 127..134 cycles

Option 3

                                                'cycles
update_hub      mov     ptrb, p_hub             ' 2
                getword t1, #cogarray, #1       ' 2
                wrword  t1, ptrb++              ' 3..10
                setq    #6-1                    ' 2
                wrlong  #cogarray+1, ptrb++     '(8-2)+5
                wrword  #cogarray+7, ptrb++     ' 9

'6 longs, 29..36 cycles

Option 4

                                                'cycles
update_hub      mov     ptrb, p_hub             ' 2
                setq    #7-1                    ' 2
                wrlong  #cogarray+1, ptrb       '(3..10)+6 

'3 longs, 13..20 cycles

All timings calculated, assuming p_hub long-aligned.
Option 1 misses next egg beater slice seven times.
Option 2 does not miss any egg beater slices.
Option 3 shows huge time saving with fast block move.
Option 4 requires 9-long cogarray as already mentioned.

Simonius · 2021-05-17 13:48

deleted

Faster Hub Update?

Comments