Shop OBEX P1 Docs P2 Docs Learn Events
Faster Hub Update? — Parallax Forums

Faster Hub Update?

JonnyMacJonnyMac Posts: 8,924
edited 2021-05-13 22:43 in Propeller 2

I have this bit of code that transfers 14 words from the cog to the hub. It works fine, at only takes about 1.5us at 200MHz.

update_hub      mov       idx, #1
                mov       ptrb, p_hub
.loop           altgw     idx, #cogarray
                getword   t1
                wrword    t1, ptrb++
                incmod    idx, #14                      wc
    if_nc       jmp       #.loop    

Could this be reduced to two lines with setq and wrword?

Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

Comments

  • TonyB_TonyB_ Posts: 2,125
    edited 2021-05-14 00:17

    Fast block moves are long-sized only: setq + rdlong/wrlong/wmlong.

  • Cluso99Cluso99 Posts: 18,069
    edited 2021-05-14 00:34

    @JonnyMac said:
    Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

    Oops - didn't totally read this. looking again

    With your setup, probably this is going to be the best

    update_hub      mov       idx, #1
                    mov       ptrb, p_hub
                    rep       @.rep, #14        ' write 14 words to hub
                    altgw     idx, #cogarray
                    getword   t1
                    wrword    t1, ptrb++
                    incmod    idx, #14      wc  ' no need for the wc
    .rep                
    

    The biggest hit will be missing the hub rotation by using wrword. If the 14 hub words were long aligned instead of being offset, then packing the words in cog and using setq #7-1 + wrlong would be faster but you would need 7 more cog registers.

  • TonyB_TonyB_ Posts: 2,125
    edited 2021-05-14 00:25
            mov ptrb, p_hub
            getword t1, #cogarray, #1
            wrword  t1, ptrb++
            setq    #6-1
            wrlong  #cogarray+1, ptrb++
            wrword  #cogarray+7, ptrb++
    
  • JonnyMacJonnyMac Posts: 8,924
    edited 2021-05-14 16:54

    Thanks for the reminder about rep, Ray; that shaved off about 50 clock ticks which is not insignificant -- and the code still meets my maniacal styling preferences.

    More details for those interested: The RC receiver spits out a 32-byte packet (FlySky I-BUS). The first two bytes are a header, the next 28 are the low and high bytes of the channel values. The final two bytes are the checksum, which is $FFFF minus the sum of the previous 30 bytes.

    It's all working nicely. I'll get it written up as a Quick Byte. This is the current state of the PASM driver.

    dat { i-bus rx driver }
    
                    org
    
    entry           setq      #3-1                                  ' get 3 longs from hub
                    rdlong    rxd, ptra
    
                    ' I-BUS rx loop
                    ' -- wait for space between frames
                    ' -- receive and store frame in cog array
    
    ibus_rx         fltl      rxd                                   ' disable smart pin
                    wrpin     #0, rxd                               ' restore tri-state mode
    
    wquiet          mov       t1, #250                              ' (re)start idle timer
    .wq1            waitx     us1                                   ' wait 1us
                    testp     rxd                           wc      ' check
        if_nc       jmp       #wquiet                               ' if active, start over
                    djnz      t1, #.wq1                             ' update idle timer
    
                    wrpin     M_IBUS, rxd                           ' set to async rx
                    drvl      rxd                                   ' activate
                    mov       icog, #0                              ' initialize cog pointer
                    mov       cs, ##$FFFF                           ' initialize checksum
    
    get_ibus        testp     rxd                           wc      ' wait for a byte
        if_nc       jmp       #get_ibus
    
                    rdpin     t1, rxd                               ' read new byte
                    shr       t1, #24                               ' align lsb
                    altsb     icog, #ibusraw                        ' store in hub array
                    setbyte   t1
                    cmp       icog, #30                     wcz     ' include byte (0..29) in checksum?
        if_b        sub       cs, t1
                    incmod    icog, #31                     wc      ' update cog pointer for next byte
        if_nc       jmp       #get_ibus                             ' continue if more
    
                    mov       icog, #0
                    altgw     icog, #ibusraw
                    getword   t1
                    cmp       t1, ##$4020                   wcz     ' validate header
        if_ne       jmp       #bad_frame
    
                    mov       icog, #15
                    altgw     icog, #ibusraw
                    getword   t1
                    cmp       t1, cs                        wcz     ' validate checksum
        if_ne       jmp       #bad_frame
    
                    wrlong    ##ON, ptra[5]                         ' set updating flag
    
    update_hub      mov       ptrb, p_hub                           ' point to hub
                    mov       icog, #1                              ' start with channel 1
                    rep       @.loop, #14                           ' write 14 channels
                     altgw    icog, #ibusraw                        ' get ch from cog array
                     getword  t1
                     wrword   t1, ptrb++                            ' write to hub
                     incmod   icog, #14                             ' update index
    .loop
    
                    wrlong    #OFF, ptra[3]                         ' bad frame flag off
    
                    rdlong    t1, ptra[4]                           ' update frames count
                    add       t1, #1
                    wrlong    t1, ptra[4]
    
                    wrlong    #OFF, ptra[5]                         ' clear updating flag
    
                    jmp       #ibus_rx                              ' back to top
    
    bad_frame       wrlong    ##ON, ptra[3]                         ' bad frame flag on
                    jmp       #ibus_rx                              ' back to top
    
    ' -------------------------------------------------------------------------------------------------
    
    M_IBUS          long      P_ASYNC_RX                            ' smart pin rx uart, true mode
    
    rxd             res       1                                     ' rx pin
    us1             res       1                                     ' ticks in 1 microsecond
    p_hub           res       1                                     ' pointer to channels in hub
    
    icog            res       1                                     ' cog index
    ibusraw         res       8                                     ' array to hold incoming bytes
    cs              res       1                                     ' checksum of received frame
    
    t1              res       1                                     ' temp vars
    t2              res       1
    t3              res       1
    
                    fit       496
    
  • msrobotsmsrobots Posts: 3,704
    edited 2021-05-14 05:12

    @Cluso99 said:

    @JonnyMac said:
    Note: There is a cog array of 8 longs (16 words); what I want to write to the hub is 14 words (skipping the first and last).

    Oops - didn't totally read this. looking again

    With your setup, probably this is going to be the best

    update_hub      mov       idx, #1
                    mov       ptrb, p_hub
                    rep       @.rep, #14        ' write 14 words to hub
                    altgw     idx, #cogarray
                    getword   t1
                    wrword    t1, ptrb++
                    incmod    idx, #14      wc  ' no need for the wc
    .rep                
    

    The biggest hit will be missing the hub rotation by using wrword. If the 14 hub words were long aligned instead of being offset, then packing the words in cog and using setq #7-1 + wrlong would be faster but you would need 7 more cog registers.

    Yeah, that makes sense. But not smart enough.

    @JonnyMac think about it this way, your cog array is not 8 longs but 9 longs.
    and you start writing/using your buffer from #cogarray word index 1 not 0.

    The first word in #cogarray is 0. Then the Header word, then your Payload of 14 words is long aligned and you can just transfer 7 longs with setq to any HUB address and meet the hub windows.

    Enjoy!

    Mike

  • JonnyMacJonnyMac Posts: 8,924
    edited 2021-05-14 16:56

    I understand. As ever, my first priority is clean code that doesn't have newcomers scratching their heads. For my own projects -- that will not be released into the general public -- I would add the extra long to the cog array and offset where the frame bytes are laid in such that channel 1 is long aligned. That's a good trick that I will keep handy, but not use in a bit of code that I will probably document for newcomers to learn from. This is for the FlySky I-BUS; I'm sure it will have some interest being the components are inexpensive. Next up is having the P2 respond to telemetry queries. I ordered some sensors so that I can watch the transactions between them and the RC receiver.

  • evanhevanh Posts: 15,187

    This should improve hub access timing:

    update_hub      mov       ptrb, p_hub                           ' point to hub
                    mov       icog, #1                              ' start with channel 1
                    mov       t2, ##ibusraw | 1<<9                  ' ch array, and increment icog each iteration
                    rep       @.loop, #14                           ' write 14 channels
                     altgw    icog, t2                              ' get ch from cog array
                     getword  t1
                     wrword   t1, ptrb++                            ' write to hub
    .loop
    
  • Cluso99Cluso99 Posts: 18,069

    Those SkyFlys are certainly cheap enough.
    Makes my hardly used Spektrum DX6i an expensive dinosaur :(
    However, I have way too many projects to risk looking too closely!

  • @TonyB_ said:

          mov ptrb, p_hub
          getword t1, #cogarray, #1
          wrword  t1, ptrb++
          setq    #6-1
          wrlong  #cogarray+1, ptrb++
          wrword  #cogarray+7, ptrb++
    

    What is wrong with this?

  • evanhevanh Posts: 15,187
    edited 2021-05-14 11:04

    Same thing that'll be wrong with mine - Too esoteric. Jon's making an effort to make his material educational.

  • @TonyB_ said:

    @TonyB_ said:

            mov ptrb, p_hub
            getword t1, #cogarray, #1
            wrword  t1, ptrb++
            setq    #6-1
            wrlong  #cogarray+1, ptrb++
            wrword  #cogarray+7, ptrb++
    

    What is wrong with this?

    @evanh said:
    Same thing that'll be wrong with mine - Too esoteric. Jon's making an effort to make his material educational.

    https://charlesdickenspage.com/images/copping-twist-large.jpg

  • JonnyMacJonnyMac Posts: 8,924
    edited 2021-05-14 16:53

    @TonyB_ and @evanh: I incorporated a number of your suggestions into my S.BUS driver (the I-BUS driver came after), and thanked you for those suggestions in the released code, and publicly during my presentation for Parallax on Wednesday. I appreciate your input -- I am here to learn, too. There is nothing wrong with any of it.

    Jon's making an effort to make his material educational.

    Yes. I have a very specific, very beginner-friendly style that I seek to maintain for code that I will share through presentations or articles (Quick Bytes or Nuts & Volts). That is to say that I will often take clarity over performance for my public projects.

  • JonnyMacJonnyMac Posts: 8,924
    edited 2021-05-14 21:05

    Those SkyFlys are certainly cheap enough.

    Yes, they are. I bought one because it would do S.BUS output. One of my best friends, Rick, is an animatronics engineer and puppeteer in the movie and TV industry. ATM, he's very busy working on a couple Star Wars related TV shows. He uses his own P1 board in most of his projects, but when S.BUS was required he'd have to use an Arduino-based board that one of the other guys in the shop provides. Now he can use the P1 or P2 (he hasn't started with it yet) in S.BUS projects.

    Another benefit of FlySky is that some of the receivers have a telemetry port that has already been exploited by others (open source flight controllers). I'm going to give it a try, too. Ken Gracey has a robot boat that he likes to drive around Lake Tahoe. He's presently using XBee to return GPS coordinates -- I'm hoping to be able to send that back to the FlySky transmitter.

  • Cluso99Cluso99 Posts: 18,069

    WOW!
    Getting telemetry back adds the complete icing on that cake!!!

  • @TonyB_ said:

    @TonyB_ said:

            mov ptrb, p_hub
            getword t1, #cogarray, #1
            wrword  t1, ptrb++
            setq    #6-1
            wrlong  #cogarray+1, ptrb++
            wrword  #cogarray+7, ptrb++
    

    What is wrong with this?

    That should be a quick byte all by itself. Beginner friendly totally has merit. People can get things going and once they do, no worries.

    However, as a general tool to understand what can be done, walking people through this code, the fast zeroing code posted earlier and other high efficiency snippets can start to deliver for those newbies as they ramp up.

    IMHO, of course.

  • JonnyMacJonnyMac Posts: 8,924
    edited 2021-05-17 20:29

    However, as a general tool to understand what can be done, walking people through this code, the fast zeroing code posted earlier and other high efficiency snippets can start to deliver for those newbies as they ramp up.

    Agreed. The "problem" is that none of the PASM experts have or are willing to take the time to teach PASM from the ground up. I do my best by including small bits of PASM in my projects and strive to make that code inviting for newcomers. Sometimes, though, I will trade super-easy, obvious code for elegant code that's easy to explain. For example, in one section of my original S.BUS driver I had something like this.

    check_floss     test    flags, #2                       wc
        if_c        neg     tmp1, #1
        if_nc       mov     tmp1, #0
                    wrlong  tmp1, p_floss
    

    Tony or Evan -- I'm sorry that I don't remember which -- suggested this:

    check_floss     test    flags, #2                       wc
                    subx    tmp1, tmp1
                    wrlong  tmp1, p_floss
    

    While not immediately obvious to most newcomers, it's very easy to explain -- and that's a requirement for me (since I do a lot of code explaining). Interestingly, one of their sophisticated suggestions was modified by Chip while I was giving my S.BUS presentation. Again, I started out very easy:

    check_ch17      test    flags, #0                       wc
        if_c        mov     t2, ##$7FF
        if_nc       mov     t2, #0
                    wrword  t2, ptrb++
    
    check_ch18      test    flags, #1                       wc
        if_c        mov     t2, ##$7FF
        if_nc       mov     t2, #0
                    wrword  t2, ptrb 
    

    I believe it was Evan that suggested the following change. Again, not immediately obvious to new PASM programmers, but easy to explain.

                    mov     tmp1, #0
    
    check_ch17      test    flags, #0                       wc
                    bitc    tmp1, #00 | (10 << 5)
    
    check_ch18      test    flags, #1                       wc
                    bitc    tmp1, #16 | (10 << 5)   
                    wrlong  tmp1, ptrb 
    

    Chip made it even easier to understand:

                    mov     tmp1, #0
    
    check_ch17      test    flags, #0                       wc
                    bitc    tmp1, #00 addbits 10
    
    check_ch18      test    flags, #1                       wc
                    bitc    tmp1, #16 addbits 10  
                    wrlong  tmp1, ptrb 
    

    We have some amazingly gifted PASM programmers in the community, but no defined path (e.g., a book) to get people at my PASM skillset (or lower) to theirs. I hate that people seem to feel slighted because I don't incorporate every one of their suggestions into my objects -- it's not a slight, it's a decision based on trying to help newcomers move from where they are to where they would like to be.

  • Cluso99Cluso99 Posts: 18,069
    edited 2021-05-17 03:41

    Jon,
    I congratulate you for keeping the code simple and easy to understand unless it's absolutely necessary for speed or space!
    It's also easier to understand by experienced programmers even if they can see better ways, or, when you go back to the code at a later date.

    BTW the above code is missing the line wrlong t2, ptrb++ in both check_ch17 sets.
    And in check_ch18 bitc is setting tmp1 but the wrlong saves tmp2.

  • evanhevanh Posts: 15,187

    Certainly not slighted from over here. I knew before I posted my tweak that it probably wouldn't be a go. I even thought about roping in the FIFO with an alternative method but that has setup and completion requirements and Tony's method is still the fastest.

                bitc    tmp1, #00 addbits 10
    

    I keep forgetting about those features in assembly. I'm more than comfortable with 10<<5.

  • The "problem" is that none of the PASM experts have or are willing to take the time to teach PASM from the ground up.

    Right. Please don't get me wrong. You are nailing it and I would never suggest you change a thing.

    Was more about mentioning an opportunity for someone to fill a gap to come upon us soon.

  • SuracSurac Posts: 176
    edited 2021-05-17 08:31
    • deleted -
  • TonyB_TonyB_ Posts: 2,125
    edited 2021-05-17 11:10

    I'm not upset my suggestion was not implemented, just a little bemused as the original question was about SETQ + WRxxxx. Below are various code options with cycle timings.

    Option 1

                                                    'cycles
    update_hub      mov     ptrb, p_hub             ' 2
                    mov     icog, #1                ' 2
                    rep     @.loop, #14             ' 2
                    altgw   icog, #ibusraw          ' 2*14
                    getword t1                      ' 2*14
                    wrword  t1, ptrb++              '(3..10)+(16-6)*7+(9-6)*6
                    incmod  icog, #14               ' 2*14
    .loop
    
    '7 longs, 181..188 cycles 
    

    Option 2

                                                    'cycles
    update_hub      mov     ptrb, p_hub             ' 2
                    mov     icog, #1                ' 2
                    mov     t2, ##ibusraw | 1<<9    ' 4
                    rep     @.loop, #14             ' 2
                    altgw   icog, t2                ' 2*14
                    getword t1                      ' 2*14
                    wrword  t1, ptrb++              '(3..10)+(8-4)*7+(9-4)*6
    .loop
    
    '8 longs, 127..134 cycles 
    

    Option 3

                                                    'cycles
    update_hub      mov     ptrb, p_hub             ' 2
                    getword t1, #cogarray, #1       ' 2
                    wrword  t1, ptrb++              ' 3..10
                    setq    #6-1                    ' 2
                    wrlong  #cogarray+1, ptrb++     '(8-2)+5
                    wrword  #cogarray+7, ptrb++     ' 9
    
    '6 longs, 29..36 cycles 
    

    Option 4

                                                    'cycles
    update_hub      mov     ptrb, p_hub             ' 2
                    setq    #7-1                    ' 2
                    wrlong  #cogarray+1, ptrb       '(3..10)+6 
    
    '3 longs, 13..20 cycles 
    

    All timings calculated, assuming p_hub long-aligned.
    Option 1 misses next egg beater slice seven times.
    Option 2 does not miss any egg beater slices.
    Option 3 shows huge time saving with fast block move.
    Option 4 requires 9-long cogarray as already mentioned.

  • SimoniusSimonius Posts: 90
    edited 2021-05-27 23:38

    deleted

Sign In or Register to comment.