;++ ; MACRO64$PI.M64 -- Based on original algorithm and C program by Remy Dube. ; ; © Digital Equipment Corporation 1992, 1993. All rights reserved. ; ; Restricted Rights: Use, duplication, or disclosure by the ; U.S. Government is subject to restrictions as set forth in ; subparagraph (c) (1) (ii) of DFARS 252.227-7013, or in ; FAR 52.227-19, or in FAR 52.227-14 Alt. III, as applicable. ; ; This software is proprietary to and embodies the confidential ; technology of Digital Equipment Corporation. Possession, use, or ; copying of this software and media is authorized only pursuant to ; a valid written license from Digital or an authorized sublicensor. ; ; This is a complete MACRO-64 program which computes PI to a specified ; number of digits. This example is one of three installed with MACRO-64: ; ; MACRO64$HELLO.M64 - Simple Hello World program ; Demonstrates calling standard macros ; MACRO64$WHAMI.M64 - Program that displays WHAMI IPR ; Demonstrates system calls ; MACRO64$PI.M64 - Program that computes PI (this program) ; Demonstrates general programming with ; MACRO-64 along with optimization techniques ; ; Digital suggests you become familiar with the concepts illustrated in ; the MACRO64$HELLO.M64 example and the MACRO64$WHAMI.M64 example before ; you attempt to understand this example example. ; ; ; The original algorithm implemented by this program is by Remy Dube. ; This MACRO-64 implementation is adapted from Remy's C implementation. ; While the algorithm and implementation are reasonably fast and accurate, ; the intent of this program is to illustrate MACRO-64 programming ; concepts rather than set world speed or precision records. This program ; illustrates the use of a number of optimization techniques, such as loop ; unrolling and routine inlining. In several instances, the context in ; which these techniques are applied causes the technique to yield only a ; marginal benefit. Furthermore, due to the decreased development and ; maintenance costs and increased portability associated with High Order ; Languages, it would not normally be advisable to write an entire program ; such as this in assembly language. Nonetheless, this program serves to ; illustrate a number of paradigms applicable to efficient programming in ; MACRO-64. ; ; This program calls the DEC C Runtime Library to perform I/O and for ; other ancillary tasks. Note that the DEC C Runtime Library is available ; on all OpenVMS AXP systems, regardless of whether you have purchased and ; installed the DEC C compiler product. ; ; To run this program on an OpenVMS AXP system, use the following commands: ; ; $ macro/alpha_axp/object=pi sys$examples:macro64$pi ; $ link pi ; $ run pi ;-- ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; ASSEMBLY-TIME CONSTANTS ;--------------------------------------------------------------------------- ; Turn on optimizations and automatic alignment. .enable align_code, align_data, peephole, schedule PRINT_WIDTH = 100 ; Width of output device BIGINT_BYTES = 8 ; Number of bytes in a quadword BIGINT_SHIFTER = 3 ; L-Shift this much to convert array index to byte offset BIGINT_BITS = BIGINT_BYTES * 8 ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; MODULE VARIABLE DATA ; --------------------------------------------------------------------------- $data_section WEIGHT: .quad 0 FILE_VAR: .quad 0 LOG_4: .g_floating 0.0 PRECISION: .quad 0 IMAX: .quad 0 CLUSTER_SIZE: .quad 0 CLUSTER_VECT: .quad 0 CLUSTER_VECT_SIZE: .quad 0 ISTOP: .quad 0 START: .quad 0 FINISH: .quad 0 DURATION: .quad 0 SCRATCH: .byte %repeat(99,<0,>)0 CLK_TCK: .g_floating 100.0 ; From DECC$LIBRARY_INCLUDE:TIME.H G1: .g_floating 1.0 G4: .g_floating 4.0 ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; UTILITY MACROS ;--------------------------------------------------------------------------- ;++ ; Macro IPOW ; Abstract: Integer power function for assembly-time values. ; Inputs: A - Value to be raised to a power ; B - Power to which to raise value ; Method: Multiply A times itself B times. ;-- .macro IPOW A B RESULT RESULT = %repeat(<>,<*>) 1 .endm IPOW ; The LOOP_UNROLL assembly-time variable is used to control manual loop ; unrolling: Use 0 For no loop unrolling, 1 for normal loop unrolling, ; or a value greater than 1 for a higher, global unroll factor. LOOP_UNROLL = 1 ;++ ; Macro UNROLL ; Abstract: ; Macro UNROLL is used to assist in in manually unrolling a loop. ; It can be beneficial to repeat the body of a loop a number of times ; with branch-outs between each repetition to the loop exit. At ; the least, you gain the benefit of a a branch not taken vs. a branch ; taken. While this effect may not gain very much, when loop unrolling ; is combined with instruction scheduling (.ENABLE SCHEDULE), you may ; be able to schedule portions successive repetitions of the loop in ; parallel. See Appendix B of the MACRO-64 Assembler for OpenVMS AXP ; Systems Reference Manual for more information on this optimization ; technique. ; ; To use UNROLL, you must define a macro that expands to the body of the ; loop. Specify the name of this macro with the BODY argument. You ; must also specify 2 branch instruction statements: 1 that tests a ; condition and branches to the beginning of the loop to continue, and ; another that tests the opposite condition and branches to the end of the ; loop to exit. Specify the loop continuation statement with the ; CONTINUE argument. Specify the loop exit statement with the EXIT ; argument. Specify the target label for the loop-continue statement ; as %%LOOP_START%% and the target for the loop-exit statement as ; %%LOOP_END%%. For instance, the continue statement might be a BNE and ; the end statement would be BEQ, or vice versa. Often, you must precede ; the branch instruction with a comparison instruction. If so, specify ; the comparison instruction statement with the COMPARE argument. ; ; You can use the TEST_FIRST argument to specify the style of the loop. ; Specify 1 to get generate an exit test prior to the first iteration of ; the loop. Specify 0 to always execute the loop at least once. ; ; You can experiment with different values for the FACTOR argument. A ; FACTOR of 0 or 1 results in a single copy of the loop body -- that is, no ; loop unrolling. FACTORs greater than 1 can improve runtime performance ; at the expense of increased code size. However, depending on the ; size of the loop body, too large a FACTOR can cause the repeated loop ; body to overflow the instruction cache, thus negatively impacting ; performance. ; ; In addition, if the loop body contains a call to another routine, ; it is unlikely that the scheduler (.ENABLE SCHEDULE) will be able to ; concurrently schedule successive repetitions of the loop body. As a ; result, the benefit realized by unrolling the loop will likely ; reduce to the difference in cost between a branch taken and a branch ; not taken times the FACTOR you specify. ; ; Thus, the best loops to unroll are those that are in your critical ; performance path, contain a small number of instructions, and do not ; call other routines. ; ; Inputs: ; BODY - Name of the macro that expands to the loop body ; FACTOR - Specific repetition factor to use when unrolling the loop ; CONTINUE - A conditional branch instruction statement to continue the loop ; EXIT - A conditional branch instruction statement to exit the loop ; COMPARE - An optional comparison instruction to use prior to the CONTINUE ; or EXIT instruction ; TEST_FIRST - non-zero means test the exit condition prior to the 1st ; iteration, zero means always execute the loop at least once ; UNROLL_LOOP (assembly variable) - Controls whether or not to unroll loops ; or whether to increase the unroll factor for all unrolled loops ; ; Outputs: ; The body of the loop is repeated FACTOR*UNROLL_LOOP times. ; ; Example usage: ; ; ; First define the loop body as a macro ; .macro LOOP_BODY ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; .endm LOOP_BODY ; ; ; Now invoke the UNROLL macro to unroll the loop ; UNROLL BODY=LOOP_BODY, FACTOR=3, TEST_FIRST=1, - ; COMPARE = , - ; CONTINUE = , - ; EXIT = ; ; ; The above use of UNROLL would result in the following code ; ; being generated (where LOOP_START and LOOP_END are unique labels ; ; for each invocation of UNROLL): ; ; cmple r0, r2, r22 ; These 2 instructions are suppressed ; beq r22, LOOP_END ; if you specify TEST_FIRST=0 ; LOOP_START: ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; beq r22, LOOP_END ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; beq r22, LOOP_END ; ldq r0, (r5) ; lda r5, 8(r5) ; lda r1, 1(r1) ; cmple r0, r2, r22 ; bne r22, LOOP_START ; LOOP_END: ;-- .macro UNROLL BODY, FACTOR, CONTINUE, EXIT, COMPARE, TEST_FIRST=1 .if not_defined, UNROLL_INDEX UNROLL_INDEX = 0 .else UNROLL_INDEX = UNROLL_INDEX + 1 .endc LOOP_START = "UNROLL_LOOP_START_%integer(UNROLL_INDEX)" LOOP_END = "UNROLL_LOOP_END_%integer(UNROLL_INDEX)" UNROLL_HELPER , , <%LOOP_START%>, <%LOOP_END%>, - , , , .endm UNROLL ; Helper macro for loop unrolling .macro UNROLL_HELPER BODY, FACTOR, START, END, CONTINUE, EXIT, COMPARE, - TEST_FIRST .if ne, COMPARE EXIT .endc START: .repeat <-1> * LOOP_UNROLL BODY COMPARE EXIT .endr BODY COMPARE CONTINUE END: .endm UNROLL_HELPER ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; MODULE LOCAL ROUTINES ;--------------------------------------------------------------------------- $routine ASK_NUMBER_OF_DIGITS, kind=stack, local=true, - data_section_pointer=true, - saved_regs= ;++ ; Abstract: Inquire from the user terminal who many digits to compute. ; Inputs: Terminal I/O ; Outputs: PRECISION - Number of digits to compute, also returned in R0 ;-- $linkage_section 10$: .asciz "How many digits do you want to compute? " 20$: .asciz "Computing PI with %-d digits \x0A\x0A" 30$: .asciz "Please specify a positive, non-zero integer value.\x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect 100$: $call DECC$GPRINTF, args=10$/a ; Print "How many?" $call DECC$GETS, args=SCRATCH/a ; Get answer bne r0, 200$ ; Branch if not ^Z stq r0, PRECISION br 400$ 200$: $call DECC$ATOL, args=SCRATCH/a ; Convert to longword bgt r0, 300$ $call DECC$GPRINTF, args=30$/a ; Print "be positive" br 100$ 300$: stq r0, PRECISION ; Set precision ; Output "Computing with N digits" $call DECC$GPRINTF, args=<20$/a, r0/l>, scratch_regs= ldq r0, PRECISION 400$: $return $end_routine ASK_NUMBER_OF_DIGITS $routine INITIALIZE, kind=stack, local=true, - saved_regs=, - data_section_pointer=true ;++ ; Abstract: Start up processing. ; Inputs: ; PRECISION ; Outputs: ; LOG_4 - log10(4.0) ; IMAX - PRECISION/LOG_4 ; WEIGHT - Scale factor for various computations ; CLUSTER_SIZE - Number of decimal digits represented in a quadword ; CLUSTER_VECT_SIZE - Number of quadwords needed for spec'd precision ; CLUSTER_VECT - Ptr to array of quadwords ; CLUSTER_VECT[0..last] - Initialized to 0 ; FILE_VAR - Opened for output to PI.DAT ;-- $linkage_section .define_ireg POWX r4 ; r4 holds pow(2,BIGINT_BITS-4)/IMAX*10 .define_ireg W r5 ; r5 is temp for WEIGHT .define_ireg CS r6 ; r6 is temp for CLUSTER_SIZE .define_ireg CVS r7 ; r7 is temp for CLUSTER_VECT_SIZE IPOW 2 T ; T=pow(2,BIGINT_BITS-4) 10$: .quad T 20$: .asciz "pi.dat" 30$: .asciz "w" 40$: .asciz " PI WITH %-d digits \x0A \x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect .base r3, $ds ; ...data section $call DECC$GLOG10, args=G4/g ; r0=LOG10(4.0) stg f0, LOG_4 ; LOG_4=LOG10(4.0) ldt f1, PRECISION ; Convert precision to... cvtqg f1, f1 ; ...G float divg f1, f0, f1 ; f1=PRECISION/LOG_4 cvtgq/c f1, f1 ; Convert to quadword stt f1, IMAX ; IMAX=PRECISION/LOG_4 ldq $IA1, IMAX ; IA1=IMAX mulq $IA1, #10, $IA1 ; IA1=IMAX*10 ;++ ; The OTS$DIV_L routine takes 2 quad word arguments and returns ; in r0 the quadword quotient of the first argument divided by the ; second argument. OTS$DIV_L is in the sharable library LIBOTS.EXE, ; which is available at link time by default. A similar routine, ; OTS$DIV_I, is available in that same library for longword integer ; division. ;-- ; r0=pow(2,BIGINT_BITS-4)/(IMAX*10) $call OTS$DIV_L, args=<10$/q, $IA1/q> ; POWX=pow(2,BIGINT_BITS-4)/(IMAX*10) mov r0, POWX mov 1, W ; WEIGHT=1 mov 0, CS ; CLUSTER_SIZE=0 ; Loop computing WEIGHT and CLUSTER_SIZE .macro LOOP_BODY ; Loop body for unrolling mulq W, #10, W ; WEIGHT=WEIGHT*10 lda CS, 1(CS) ; CLUSTER_SIZE=CLUSTER_SIZE+1 .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=10, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = stq W, WEIGHT ; Store local copies of stq CS, CLUSTER_SIZE ; WEIGHT and CLUSTER_SIZE ldq $IA0, PRECISION ; IA0=PRECISION addq CS, $IA0, $IA0 ; IA0=PRECISION+CLUSTER_SIZE lda $IA0, -1($IA0) ; IA0=PRECISION+CLUSTER_SIZE-1 ; r0=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE $call OTS$DIV_L, args=<$IA0/q,CS/q> ; r0=((PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE)+1 lda r0, 1(r0) ; CLUSTER_VECT_SIZE=((PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE)+1 mov r0, CVS stq r0, CLUSTER_VECT_SIZE ; Allocate CLUSTER_VECT sll r0, #BIGINT_SHIFTER, r0 $call DECC$MALLOC, args=r0/l, scratch_regs= stq r0, CLUSTER_VECT .define_ireg CCA r0 ; r0 is current cluster addr ; Initialize CLUSTER_VECT mov CVS, r1 ; r1=CLUSTER_VECT_SIZE 100$: stq r31, (CCA) ; *CCA = 0 lda r1, -1(r1) ; --r1 lda r0, BIGINT_BYTES(R0) ; CCA++ bgt r1, 100$ ; Branch if not done $call DECC$FOPEN, args=<20$/a, 30$/a> ; Open output file stq r0, FILE_VAR $call DECC$GFPRINTF, args=, - scratch_regs= $return .undefine_reg POWX .undefine_reg W .undefine_reg CS .undefine_reg CVS .undefine_reg CCA $end_routine INITIALIZE $routine MULTIPLY_AND_DIVIDE, kind=stack, - saved_regs=, - data_section_pointer=true, - local=true ;++ ; Abstract: Walk the CLUSTER_VECTOR array performing a series of ; multiplications and divisions. ; Inputs: ; IA0 - NUMERATOR ; IA1 - DENOMINATOR ; ISTOP - Indicator of how much to compute ; WEIGHT ; CLUSTER_VECT ; CLUSTER_VECT[0..ISTOP] ; Outputs: ; CLUSTER_VECT[0..ISTOP] ;-- .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect .define_ireg TEMP r4 ; r4 is variable 'temp' .define_ireg I r5 ; r5 is variable 'i' .define_ireg W r6 ; r6 is local WEIGHT .define_ireg CCA r7 ; r7 is curr cluster addr .define_ireg NUM r8 ; r8 is local copy of NUM .define_ireg DEN r9 ; r9 is local copy of DEN .define_ireg CC r10 ; r10 is curr cluster clr TEMP ; TEMP=0 ldq I, ISTOP ; I=ISTOP lda I, 1(I) ; I=ISTOP+1 ldq W, WEIGHT ; Load local copy of WEIGHT ldq CCA, CLUSTER_VECT ; CCA = &CLUSTER_VECT[0] mov $IA0, NUM ; Get local copy of NUM mov $IA1, DEN ; Get local copy of DEN .macro LOOP_BODY ; Unroll loop mulq TEMP, W, TEMP ; TEMP *= WEIGHT ldq CC, (CCA) ; CC = *CCA mulq NUM, CC, r22 ; r22 = CC * NUM addq TEMP, r22, TEMP ; TEMP += CC * NUM $call OTS$DIV_L, - ; r0=TEMP/DEN args= stq r0, (CCA) ; *CCA=TEMP/DEN mulq DEN, r0, r0 ; r0=DEN * (*CCA) subq TEMP, r0, TEMP ; TEMP -= DEN * (*CCA) lda CCA, BIGINT_BYTES(CCA); Advance to next cluster lda I, -1(I) ; Decrement loop index .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=8, TEST_FIRST=1, - CONTINUE = , - EXIT = $return .undefine_reg TEMP .undefine_reg I .undefine_reg W .undefine_reg CCA .undefine_reg NUM .undefine_reg DEN .undefine_reg CC $end_routine MULTIPLY_AND_DIVIDE ;++ ; Macro PROPAGATE_CARRY ; Abstract: Walk the CLUSTER_VECTOR array in reverse order propagating ; a carry out from one cluster to its predecessor as necessary. ; (A manually inlined routine.) ; Inputs: ; CA - register that holds &CLUSTER_VECT[0] ; WEIGHT ; CLUSTER_VECT[0..last] ; Outputs: ; CLUSTER_VECT[0..last] ;-- .macro PROPAGATE_CARRY .define_ireg CCA r0 ; r0 is addr of current cluster .define_ireg CC r1 ; r1 is current cluster .define_ireg CARRY r22 ; r22 is 'CARRY' variable .define_ireg W r23 ; r23 is local WEIGHT clr CARRY ; CARRY=0 ldq W, WEIGHT ; load local copy of WEIGHT ldq CCA, CLUSTER_VECT_SIZE ; CCA=sizeof(CLUSTER_VECT) lda CCA, -1(CCA) ; CCA=sizeof(CLUSTER_VECT)-1 ; CCA=(sizeof(CLUSTER_VECT)-1)*sizeof(CLUSTER_VECT[0]) sll CCA, #BIGINT_SHIFTER, CCA addq CA, CCA, CCA ; CCA --> last cluster .macro LOOP1_BODY ; Unroll loop 1 ldq CC, (CCA) ; r22 = *CCA addq CC, CARRY, CC ; r22 = *CCA + CARRY clr CARRY ; CARRY=0 .macro LOOP2_BODY ; Unroll loop 2 subq CC, W, CC ; CC -= WEIGHT lda CARRY, 1(CARRY) ; CARRY++ .endm LOOP2_BODY UNROLL BODY=LOOP2_BODY, FACTOR=8, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = stq CC, (CCA) ; Save current cluster lda CCA, -BIGINT_BYTES(CCA) ; Backup 1 cluster .endm LOOP1_BODY UNROLL BODY=LOOP1_BODY, FACTOR=2, TEST_FIRST=1, - COMPARE = , - CONTINUE = , - EXIT = .undefine_reg CCA .undefine_reg CC .undefine_reg CARRY .undefine_reg W .endm PROPAGATE_CARRY $routine COMPUTE, kind=stack, - saved_regs=, - local=true, - data_section_pointer=true, - size=$rsa_end+16 ;++ ; Abstract: Build up a binary representation of PI in the CLUSTER_VECT array. ; ; Inputs: ; PRECISION ; CLUSTER_SIZE ; IMAX ; LOG_4 ; CLUSTER_VECT[0] ; ; Outputs: ; CLUSTER_VECT[0..last] ; ; Notes: ; $ROUTINE defines the assembly-time variable $RSA_END as the offset ; from the beginning of the stack frame to just beyond the end of the ; the register save area. You can use this variable both to define ; the stack size with $ROUTINE's SIZE argument as above, and in ; referencing the stack beyond the register save area. In this case, ; we have allocated an additional 16 bytes of stack storage beyond the ; end of the register-save area. Note that the size of the stack frame ; must be an even multiple of 16. ;-- $linkage_section .define_ireg I r4 ; r4 is variable 'I' .define_ireg CS r5 ; r5 is local CLUSTER_SIZE .define_ireg TMP r6 ; r6 is variable 'TMP' .define_ireg CA r7 ; r7 is local &CLUSTER_VECT[0] .define_ireg PCC r8 ; r8 is variable 'PCC' .define_ireg I2M1 r9 ; r9 is 2*I-1 .define_ireg I4 r10 ; r10 is 4*I and 4*I+2 .define_freg I.G f2 ; f2 is variable 'I.G', G float ; shadow of I .define_freg L4 f3 ; f3 is local LOG_4 .define_freg ONE.G f4 ; f4 holds 1.0 in G floating $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect ldq I, IMAX ; I=IMAX ldt I.G, IMAX ; I.G shadows I cvtqg I.G, I.G ; Convert to G format ldg L4, LOG_4 ; Load local LOG_4 ldq CS, CLUSTER_SIZE ; Load local CLUSTER_SIZE ldq TMP, PRECISION ; TMP=PRECISION mov TMP, PCC ; PCC=PRECISION lda TMP, 2(TMP) ; TMP=PRECISION+2 addq CS, TMP, TMP ; TMP=CLUSTER_SIZE+PRECISION+2 ldq CA, CLUSTER_VECT ; CA = &CLUSTER_VECT[0] addq PCC, CS, PCC ; PCC=PRECISION+CLUSTER_SIZE lda PCC, -1(PCC) ; PCC=PRECISION+CLUSTER_SIZE-1 ; r0=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE $call OTS$DIV_L, args= ; PCC=(PRECISION+CLUSTER_SIZE-1)/CLUSTER_SIZE mov r0, PCC ldg ONE.G, G1 ; Load ONE.G with 1.0 .macro LOOP_BODY ?LABEL ; Unroll loop mulg I.G, L4, f0 ; f0=I*LOG_4 cvtgq/c f0, f0 ; Convert to integer ;++ ; We must store F0 to memory in order to load it into an ; integer register. Our invocation of the $ROUTINE macro ; above allocated an additional 16 bytes of stack storage, ; 8 of which we will use now for that purpose. We can ; reference the stack beyond the end of the register save ; area relative to the frame pointer, FP, using the ; $RSA_END assembly-time variable that is defined by the ; $ROUTINE macro. ;-- stt f0, $rsa_end(fp) ; Put to memory ldq $IA0, $rsa_end(fp) ; Read back to I reg ; IA0=(PRECISION-I*LOG_4+2+CLUSTER_SIZE) subq TMP, $IA0, $IA0 $call OTS$DIV_L, args=<$IA0/q, CS/q> cmple r0, PCC, r1 ; result <= PCC? bne r1, LABEL ; Yes, branch mov PCC, r0 ; No, use PCC LABEL: stq r0, ISTOP ; ISTOP=min(result,PCC) sll I, #1, I2M1 ; I2M1 = 2*I lda I2M1, -1(I2M1) ; I2M1 = 2*I-1 sll I, #2, I4 ; I4 = 4*I $call MULTIPLY_AND_DIVIDE, args=, - local=true lda I4, 2(I4) ; I4=4*I+2 $call MULTIPLY_AND_DIVIDE, args=, - local=true ldq r0, (CA) ; r0=CLUSTER_VECT[0] lda r0, 3(R0) ; r0=CLUSTER_VECT[0] + 3 stq r0, (CA) ; CLUSTER_VECT[0] += 3 lda I, -1(I) ; I-- subg I.G, ONE.G, I.G ; I.G shadows I .endm LOOP_BODY UNROLL BODY=LOOP_BODY, FACTOR=4, TEST_FIRST=1, - CONTINUE = , - EXIT = PROPAGATE_CARRY ; Inlined routine call $return .undefine_reg I .undefine_reg CS .undefine_reg TMP .undefine_reg CA .undefine_reg PCC .undefine_reg I2M1 .undefine_reg I4 .undefine_reg I.G .undefine_reg L4 .undefine_reg ONE.G $end_routine COMPUTE $routine PRINT_RESULT, kind=stack, - saved_regs=,- local=true, - data_section_pointer=true, - size=$rsa_end+16 ;++ ; Abstract: Print the results of the computation. ; Inputs: ; START - seconds at start of computation ; FINISH - seconds at end of computation ; FILE_VAR ; PRECISION ; WEIGHT ; CLUSTER_SIZE ; CLUSTER_VECT_SIZE ; CLUSTER_VECT[0..last] ; PRINT_WIDTH ;-- $linkage_section .define_freg SECONDS f2 ; f2 is variable 'SECONDS' .define_ireg MINUTES r4 ; r4 is variable 'MINUTES' .define_ireg HOURS r5 ; r5 is variable 'HOURS' .define_ireg I r6 ; r6 is variable 'I' .define_ireg CCA r7 ; r7 is variable 'CCA' .define_ireg CC r8 ; r8 is variable 'CC' .define_ireg CLIM r9 ; r9 is variable 'CLIM' .define_ireg J r10 ; r10 is variable 'J' .define_ireg CS r11 ; r11 is local CLUSTER_SIZE .define_ireg W r12 ; r12 is local WEIGHT .define_ireg COUNT r13 ; r13 is variable COUNT .define_ireg P r14 ; r14 is local PRECISION 10$: .asciz - " Calculation time : %-d hours %-d minutes %f seconds \x0A \x0A" 20$: .asciz "%-d." 30$: .asciz "%-d" 40$: .asciz "\x0A " 50$: .asciz "\x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect ldq r22, FINISH ; r22=FINISH ldq r23, START ; r23=START subq r22, r23, r23 ; r23=FINISH-START stq r23, $rsa_end(fp) ; Convert FINISH-START ldt f0, $rsa_end(fp) ; ...to... cvtqg f0, f0 ; ...G floating ldg f1, CLK_TCK ; f1=CLK_TCK divg f0, f1, SECONDS ; SECONDS=(FINISH-START)/CLK_TCK cvtgq/c SECONDS, f0 ; Convert... stt f0, $rsa_end(fp) ; ...to integer $call OTS$DIV_L, args=<$rsa_end(fp)/q, 60/a> ; r0=SECONDS/60 mov r0, MINUTES ; MINUTES=SECONDS/60 mulq r0, #60, r0 ; r0=MINUTES*60 stq r0, $rsa_end(fp) ; Convert... ldt f0, $rsa_end(fp) ; ...to... cvtqg f0, f0 ; G floating subg SECONDS, f0, SECONDS ; SECONDS -= MINUTES*60 $call OTS$DIV_L, - ; r0=MINUTES/60 args= mov r0, HOURS ; HOURS=MINUTES/60 mulq HOURS, #60, r0 ; r0=HOURS*60 subq MINUTES, r0, MINUTES ; MINUTES -= HOURS*60 $call DECC$GFPRINTF, - args= ldq CCA, CLUSTER_VECT ; CCA = &CLUSTER_VECT[0] ldq CLIM, CLUSTER_VECT_SIZE ; CLIM=sizeof(CLUSTER_VECT) ; CLIM=sizeof(CLUSTER_VECT)*sizeof(CLUSTER_VECT[0]) sll CLIM, #BIGINT_SHIFTER, CLIM addq CCA, CLIM, CLIM ; CLIM --> beyond last cluster ldq CC, (CCA) ; CC = CLUSTER_VECT[0] $call DECC$GFPRINTF, - args= mov 1, I ; I=1 ldq CS, CLUSTER_SIZE ; Load local cluster size lda CCA, BIGINT_BYTES(CCA) ; Advance to next cluster cmplt CCA, CLIM, r0 ; Beyond last cluster? beq r0, 400$ ; Yes, branch to exit loop 1 ldq W, WEIGHT ; Load local copy of WEIGHT ldq P, PRECISION ; Load local copy of PRECISION clr COUNT ; COUNT=0 100$: ldq CC, (CCA) ; CC = *CCA mov 1, J ; J=1 cmple J, CS, r1 ; J<=CLUSTER_SIZE? beq r1, 300$ ; No, branch to exit loop 2 200$: mulq CC, #10, CC ; CC *= 10 $call OTS$DIV_L, args= ; r0=CC/WEIGHT mov r0, $IA2 ; IA2=CC/WEIGHT mulq W, r0, r0 ; r0=IA2*WEIGHT subq CC, r0, CC ; CC -= IA2*WEIGHT cmple COUNT, P, r1 ; COUNT<=PRECISION? beq r1, 210$ ; No, branch sextl $IA2, $IA2 ; Convert to longword $call DECC$GFPRINTF, - args= 210$: lda COUNT, 1(COUNT) ; COUNT++ cmplt I, #PRINT_WIDTH, r0 ; I>=PRINT_WIDTH? bne r0, 220$ ; No, branch clr I ; I=0 $call DECC$GFPRINTF, - ; Print linefeed + 2 spaces args= 220$: lda I, 1(I) ; I++ lda J, 1(J) ; J++ cmple J, CS, r0 ; J<=CLUSTER_SIZE? bne r0, 200$ ; Yes, branch to continue loop 2 300$: lda CCA, BIGINT_BYTES(CCA) ; Advance to next cluster cmplt CCA, CLIM, r0 ; Beyond last cluster? bne r0, 100$ ; No, branch to continue loop 1 $call DECC$GFPRINTF, - ; Print linefeed args= 400$: $return .undefine_reg SECONDS .undefine_reg MINUTES .undefine_reg HOURS .undefine_reg I .undefine_reg CCA .undefine_reg CC .undefine_reg CLIM .undefine_reg J .undefine_reg CS .undefine_reg W .undefine_reg COUNT .undefine_reg P $end_routine PRINT_RESULT $routine CLEANUP, kind=stack, - local=true, - data_section_pointer=true ;++ ; Abstract: Perform shutdown trivia. ; Inputs: FILE_VAR ; Outputs: The output file is closed. ;-- .base r27, $ls ; Access linkage section ldq r22, $dp ; Access... .base r22, $ds ; ...data section $call DECC$FCLOSE, args=FILE_VAR/q $return $end_routine CLEANUP ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; EXTERNAL ROUTINES ;--------------------------------------------------------------------------- $routine PI, kind=stack, saved_regs=, - data_section_pointer=true, local=false ;++ ; Abstract: Program entry point. ; Outputs: ; PI.DAT with the specified number of decimal places. ;-- $linkage_section COPYRIGHT_MSG: .asciz - ; \xA9 is '©', \x0A is , or '\n' in C parlance "\xA9 Digital Equipment Corporation 1992, 1993. All rights reserved.\x0A" $code_section .base r27, $ls ; 1st access link sect w/r27 mov r27, r2 ; Later use r2 for link sect ldq r3, $dp ; Access... .base r3, $ds ; ...data section .base r2, $ls ; Now use r2 for link sect $call DECC$GPRINTF, - ; Display copyright notice. args=COPYRIGHT_MSG/A $call ASK_NUMBER_OF_DIGITS, local=true bgt r0, 10$ ; Branch if positive precision mov 1, r0 ; Return success status $return 10$: $call INITIALIZE, local=true $call DECC$CLOCK stq r0, START $call COMPUTE, local=true $call DECC$CLOCK stq r0, FINISH $call PRINT_RESULT, local=true $call CLEANUP, local=true mov 1, r0 ; Return success status $return $end_routine PI .end PI