;; Copyright © 2001,2002 Marko Mäkelä (msmakela@nic.funet.fi)
;;
;;     This program is free software; you can redistribute it and/or modify
;;     it under the terms of the GNU General Public License as published by
;;     the Free Software Foundation; either version 2 of the License, or
;;     (at your option) any later version.
;;
;;     This program is distributed in the hope that it will be useful,
;;     but WITHOUT ANY WARRANTY; without even the implied warranty of
;;     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;     GNU General Public License for more details.
;;
;;     You should have received a copy of the GNU General Public License
;;     along with this program; if not, write to the Free Software
;;     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

;;; This program code can be compiled with the GNU assembler (see last lines).

;;; There exist several versions of the C2N232.  Their main difference
;;; is the circuit board layout and the wiring of the RTS and CTS signals:
;;; 
;;; CTS RTS sym description
;;; --- --- --- -----------
;;; PB7 PB4 DIL First board with solder-through components (dil.brd)
;;; PD5 PD4 SMD First surface mounted boards (smd.brd, smdl.brd)
;;; PB6 PB7     Simplified surface mounted board (smdcr.brd)
;;;
;;; The firmware ignores the RTS signal.

	;; By default, the code is assembled for the newest board (smdcr.brd).
;	.equ DIL, 1		; for dil.brd
;	.equ SMD, 1		; for smd.brd or smdl.brd

;;; Revision history:
;;; 1	Initial release (reformatting of c2n232.asm, added DIL and SMD)
;;; 2	Added support for asymmetric pulses
;;; 3	Added raw pulse stream playback (7)
;;;	LOAD operation (3) will now flush the buffer when receiving NUL
;;; 4	Added packed LOAD operation (8)
	.equ REVISION, 4	; adjust this to downgrade

;;; Modes of operation:
;;; Idle mode (0)
;;; - entered upon RESET
;;; - entered upon receiving BREAK or NUL or an unknown command character
;;; - the communication buffers are cleared
;;; - the cassette lines READ and SENSE are set to the HIGH state ('1')
;;; Pulse width measurement mode (1)
;;; - entered upon receiving ctrl-a
;;; - the controller will clock the high-to-low transitions on WRITE
;;;   and report them over RS-232 as 8-bit unsigned values,
;;;   the unit being 8 microseconds
;;; - there is no flow control; the RS-232 device must be fast enough
;;; SAVE operation (2)
;;; - entered upon receiving ctrl-b SHORT MEDIUM LONG
;;; - three parameters: maximum pulse widths; unit=8 microseconds
;;; - the pulses are converted to characters on the RS-232 line:
;;;   - A=pause (pulse width > LONG)
;;;   - B=short (pulse width <= SHORT)
;;;   - C=medium (SHORT < pulse width <= MEDIUM)
;;;   - D=long (MEDIUM < pulse width <= LONG)
;;; - there is no flow control; the RS-232 device must be fast enough
;;; LOAD operation (3)
;;; - entered upon receiving ctrl-c PAUSE SHORT MEDIUM LONG
;;; - four parameters: half-pulse widths; unit=8 microseconds
;;; - received characters A..D from RS-232 indicate pulses with 50% duty cycle:
;;;   - A=00=pause (pulse width = 2 * PAUSE)
;;;   - B=01=short (pulse width = 2 * SHORT)
;;;   - C=10=medium (pulse width = 2 * MEDIUM)
;;;   - D=11=long (pulse width = 2 * LONG)
;;; - the characters E..P specify asymmetric pulses
;;;   - these are treated as 4-bit numbers, e.g. 'J'-'A' = 9 = 1001 in binary
;;;   - the LSB (01=short for 'J') specify the width of the LOW level
;;;   - the MSB (10=medium for 'J') specify the width of the HIGH level
;;; - XON/XOFF flow control is used during input
;;; - NUL will enter idle mode once the data has been relayed
;;; send operation (4)
;;; - entered upon receiving ctrl-d LENGTH
;;; - one 16-bit parameter (little-endian): number of data bytes - 1
;;; - after transceiving LENGTH+1 characters, an acknowledgement '@' is sent
;;;   and the controller enters idle mode
;;; - XON/XOFF flow control is used during input
;;; - the only way to abort this command is to send the BREAK signal
;;; receive operation (5)
;;; - entered upon receiving ctrl-e LENGTH
;;; - one 16-bit parameter (little-endian): number of data bytes - 1
;;; - after transceiving LENGTH+1 characters, the controller enters idle mode
;;; - there is no flow control; the RS-232 device must be fast enough
;;; receive operation with calibration (6)
;;; - entered upon receiving ctrl-f LENGTH
;;; - one 16-bit parameter (little-endian): number of data bytes - 1
;;; - the first byte received from READ must be 0xf0; this will calibrate
;;;   the expected pulse widths
;;; - after transceiving LENGTH+1 characters, the controller enters idle mode
;;; - there is no flow control; the RS-232 device must be fast enough
;;; TAP operation (7)
;;; - entered upon receiving ctrl-g
;;; - no parameters
;;; - received characters from RS-232 indicate pulses with 50% duty cycle:
;;;   - n=1..255 = n * 8 microseconds
;;;   - NUL will enter idle mode once the data has been relayed
;;; - XON/XOFF flow control is used during input
;;; packed half-pulse LOAD operation (8)
;;; - entered upon receiving ctrl-h WIDTH00 WIDTH01 WIDTH10 WIDTH11
;;; - four parameters: half-pulse widths; unit=8 microseconds
;;; - each character received from RS-232 contains four half-pulses
;;; - the least significant bit pairs of the character are sent first
;;; - XON/XOFF flow control is used during input
;;; - at the end of stream, '0' is sent and idle mode is entered automatically

	;; initialize I/O port D (CTS=high if connected to port D)
.macro ioinitd
	.ifdef SMD		; smd.brd, smdl.brd (surface mounted)
	ldi 16, 0x20
	out PORTD, 16
	out DDRD, 16
	.else			; dil.brd and smdcr.brd
	clr 16
	out DDRD, 16
	.endif
.endm

	;; initialize I/O port B (SENSE=high, READ=high, CTS=high if connected)
.macro ioinitb
	.ifdef DIL
	ldi 16, 0x89		; dil.brd (solder-through)
	.else
	.ifdef SMD		; smd.brd, smdl.brd (surface mounted)
	ldi 16, 0x09
	.else			; smdcr.brd (surface mounted, cost reduced)
	ldi 16, 0x49
	.endif
	.endif
	out PORTB, 16		; SENSE=high; READ=high; CTS=high (stop rx)
	out DDRB, 16
.endm
	;; CTS control
.macro cts op
	.ifdef DIL		; dil.brd (solder-through)
	\op PORTB, 7
	.else
	.ifdef SMD		; smd.brd, smdl.brd (surface mounted)
	\op PORTD, 5
	.else			; smdcr.brd (surface mounted, cost reduced)
	\op PORTB, 6
	.endif
	.endif
.endm

	.equ SREG, 0x3F		; Status Register
	.equ SPL, 0x3D		; Stack Pointer Low
	.equ GIMSK, 0x3B	; General Interrupt MaSK register
	.equ GIFR, 0x3A		; General Interrupt Flag Register
	.equ TIMSK, 0x39	; Timer/Counter Interrupt MaSK register
	.equ TIFR, 0x38		; Timer/Counter Interrupt Flag register
	.equ MCUCR, 0x35	; MCU general Control Register
	.equ TCCR0, 0x33	; Timer/Counter 0 Control Register
	.equ TCNT0, 0x32	; Timer/Counter 0 (8-bit)
	.equ TCCR1A, 0x2F	; Timer/Counter 1 Control Register A
	.equ TCCR1B, 0x2E	; Timer/Counter 1 Control Register B
	.equ TCNT1H, 0x2D	; Timer/Counter 1 High Byte
	.equ TCNT1L, 0x2C	; Timer/Counter 1 Low Byte
	.equ OCR1AH, 0x2B	; Output Compare Register 1 High Byte
	.equ OCR1AL, 0x2A	; Output Compare Register 1 Low Byte
	.equ ICR1H, 0x25	; T/C 1 Input Capture Register High Byte
	.equ ICR1L, 0x24	; T/C 1 Input Capture Register Low Byte
	.equ WDTCR, 0x21	; Watchdog Timer Control Register
	.equ EEAR, 0x1E		; EEPROM Address Register
	.equ EEDR, 0x1D		; EEPROM Data Register
	.equ EECR, 0x1C		; EEPROM Control Register
	.equ PORTB, 0x18	; Data Register, Port B
	.equ DDRB, 0x17		; Data Direction Register, Port B
	.equ PINB, 0x16		; Input Pins, Port B
	.equ PORTD, 0x12	; Data Register, Port D
	.equ DDRD, 0x11		; Data Direction Register, Port D
	.equ PIND, 0x10		; Input Pins, Port D
	.equ UDR, 0x0C		; UART I/O Data Register
	.equ USR, 0x0B		; UART Status Register
	.equ UCR, 0x0A		; UART Control Register
	.equ UBRR, 0x09		; UART Baud Rate Register
	.equ ACSR, 0x08		; Analog Comparator Control and Status Register

	.equ BAUDRATE, 12	; 38400 bps at 8 MHz

	.equiv PARAM0, 0	; first parameter
	.equiv PARAM1, 1	; second parameter
	.equiv PARAM2, 2	; third parameter
	.equiv PARAM3, 3	; fourth parameter

	.equiv PFLO, 2		; recv: least significant byte of pulse fall
	.equiv PFHI, 4		; recv: most significant byte of pulse fall
	.equiv PMIN, 5		; minimum pulse width (recv and send)
	.equiv PDIFF, 6		; maximum-minimum (recv and send)
	.equiv PCMP, 7		; recv: binary search variable
	.equiv PSMAX, 8		; recv: binary search maximum
	.equiv PSCNT, 8		; send: bit count
	.equiv PDATA, 9		; recv/send: current data octet

	.equiv PWIDTH, 4	; load: pulse width being output
	.equiv PTAP, 7		; bit 0=end of stream
				; bits 2,1=00 (load)
				;          01 (TAP playback)
				;          1x (packed load)
				; x=nibble counter (0=low, 1=high)

	.equiv MAXBUF, 14	; upper limit for receive buffer
	.equiv NPARAM, 15	; number of octets to receive from UART

	;; 16 and ZH are received for interrupts
	;; ZL is for receiving UART communications

	.equiv YL, 28		; transmit pointer
	.equiv YH, 29		; constantly zero
	.equiv ZL, 30		; receive buffer pointer
	.equiv ZH, 31		; temporary register used in interrupts

	;; RAM map
rxbuf	= 0x60
rxbufe	= rxbuf + 124
stack	= rxbufe
stacke	= stack + 4
	.text
	rjmp reset
	rjmp error		; INT0 (external interrupt 0, serial ATN)
	rjmp error		; INT1 (external interrupt 1, serial CLK)
	rjmp icp1		; ICP1 (input capture 1, cassette WRITE)
	rjmp oc1		; OC1 (output compare 1, cassette READ)
	rjmp error		; OVF1 (timer 1 overflow)
	rjmp error		; OVF0 (timer 0 overflow)
	rjmp rxc		; RXC (UART receive complete)
	rjmp error		; DRE (UART data register empty)
	rjmp error		; TXC (UART transmit complete)
;	rjmp error		; ACI (analog comparator) [fall through]

	;; error: go to the RESET sequence
error:
	;; RESET handler
reset:	cli
	ldi 16, stacke-1
	out SPL, 16
	;; I/O port initialization
	ioinitd
	ioinitb
	;; disable the analog comparator
	ldi 16, 0x80
	out ACSR, 16
	;; initialize the UART
	ldi 16, BAUDRATE
	out UBRR, 16
	;; enable idle mode sleep
	ldi 16, 0x20
	out MCUCR, 16
	clr ZL
	clr YH
	clr PMIN
	clr PDIFF
	clr NPARAM		; no data to receive
	ldi 16, (rxbufe-rxbuf)*3/4
  	mov MAXBUF, 16		; safety margin for RX buffer overflow

	;; idle mode
idle:	out TIMSK, YH		; disable ICP and OC interrupts
	sbi PORTB, 0		; SENSE=high
idle_common:
	ldi 16, 0x98
	out UCR, 16		; enable RX and TX; enable RXC interrupts
	cts sbis
	rjmp idle_dc1_done	; jump if receiving was previously enabled
	ldi 16, 0x11		; DC1, ctrl-q, permission to send
	sbis USR, 5
	rjmp .-4		; wait for UART Data Register Empty (DRE)
	out UDR, 16
	cts cbi			; drop CTS (start receiving)
idle_dc1_done:
	sei			; enable interrupts
	;; idle loop
	sleep
	rjmp .-4

	;; pulse width measurement mode
pulse:	ser 16
	out OCR1AH, YH		; set the output compare value
	out OCR1AL, 16		; (16 bits, 0x00ff)
	cbi PORTB, 0		; SENSE=low
	clr PARAM0		; select pulse width measurement mode for ICP
	ldi 16, 0x48
	out TIFR, 16		; clear pending ICP and OC interrupts
	out TIMSK, 16		; enable ICP and OC interrupts
	ldi 16, 0xcb		; activate ICP on rising edge, CK/64
	out TCCR1B, 16
	rjmp idle_common

	;; wait for parameters
.macro waitparams
	sei			; enable interrupts
	sleep			; wait for interrupt
	cli			; disable interrupts
	tst NPARAM
	brne .-10		; wait until all parameters have been received
.endm

	;; save data
save:	waitparams		; enable interrupts and wait for the parameters
	out OCR1AH, YH		; set the output compare value
	out OCR1AL, PARAM2	; long pulse width
	cbi PORTB, 0		; SENSE=low
	ldi 16, 0x48
	out TIFR, 16		; clear pending ICP and OC interrupts
	out TIMSK, 16		; enable ICP and OC interrupts
	ldi 16, 0xcb		; activate ICP on rising edge, CK/64
	out TCCR1B, 16
	rjmp idle_common	; go to idle loop

	;; load data
load:	waitparams		; enable interrupts and wait for the parameters
tap:	out OCR1AH, YH		; set the output compare value
	out OCR1AL, YH		; (16 bits)
	ldi ZL, rxbuf		; load the buffer pointers
	mov YL, ZL
	cbi PORTB, 0		; SENSE=low
	ldi 16, 0x40
	out TIFR, 16		; clear pending OC interrupts
	out TIMSK, 16		; enable OC interrupts
	rjmp idle_common	; go to idle loop

	;; calibrate the receiver (read an 0xf0 byte and then actual data)
recvcal:
	clr PMIN
	clr PDIFF
	;; fall through
	;; receive data from the cassette interface
recv:	waitparams		; enable interrupts and wait for the parameters
	ser 16
	out OCR1AH, 16		; set the output compare value
	out OCR1AL, 16		; (16 bits, 0xffff)
	ldi 16, 0x08
	out TIFR, 16		; clear pending ICP interrupts
	out TIMSK, 16		; enable ICP interrupts
	ldi 16, 0x09		; activate ICP on falling edge, CK/1
	out TCCR1B, 16
	sei			; enable interrupts
	cbi PORTB, 3		; drop READ to initiate transfer
	rjmp idle_common

	;; send data to the cassette interface
send:	waitparams
	ldi 16, 0x02		; activate ICP on falling edge, no CTC1, CK/8
	out TCCR1B, 16
	com NPARAM		; set NPARAM:7, a flag for rxc interrupt
	mov PSCNT, NPARAM	; negative bit count (start sending a new byte)
	ldi ZL, rxbuf		; load the buffer pointers
	mov YL, ZL
	rjmp idle_common

	;; UART receive complete
rxc:	sbrs NPARAM, 7
	rjmp rxc_nosend
	;; the custom send mode is active
	cbi UCR, 7		; disable RXC interrupts
	sbrs PSCNT, 7		; skip "sei" if no icp1 transfer is pending
	sei			; re-enable interrupts to reduce icp1 latency
  	in 16, UDR		; read the character
	in ZH, USR
	sbrc ZH, 4
	rjmp rxc_error		; framing error
	sbrc ZH, 3
	rjmp rxc_error		; overrun
	cli
	in ZH, TIMSK		; see if the sender has been stopped
	cpse ZH, YH
	rjmp rxc_enqueue_sei	; sender is going
	;; the sender has been stopped: restart it
	rcall rxc_enqueue_sei	; enqueue the character
	cli
	ldi ZH, 0x08
	out TIFR, ZH		; clear pending ICP interrupts
	out TIMSK, ZH		; enable ICP interrupts
	sbic PIND, 6
	reti			; return if cassette write=high
	rjmp icp1send0		; otherwise start a new transfer

rxc_enqueue_sei:
	sei
	;; enqueue a character
rxc_enqueue:
	mov ZH, YL		; dequeuing pointer
	sub ZH, ZL		; enqueuing pointer
	dec ZH
	sbrc ZH, 7
	subi ZH, rxbuf-rxbufe	; reduce the difference modulo the buffer
	cpse ZH, YH
	rjmp rxc_novf
	rjmp rxc_error		; buffer overflow
rxc_novf:
	cpse ZH, MAXBUF
	rjmp rxc_st		; buffer not 75% full; enqueue directly
	ldi ZH, 0x13		; DC3, ctrl-s, request to stop sending
	out UDR, ZH
	cts sbi			; raise CTS (stop receiving)
	rjmp rxc_st		; enqueue the data

	;; not a custom send character
rxc_nosend:
  	in 16, UDR
	in ZH, USR
	andi ZH, 0x18		; check for overrun or framing error
	brne rxc_error
	cpi ZL, rxbuf
	brsh rxc_pulse
	;; received a command or a parameter
	cpse NPARAM, YH	; number of bytes to receive
	rjmp rxc_param
	rjmp rxc_cmd		; no data to receive -> get a command
rxc_param:
	dec NPARAM
rxc_st:	clr ZH			; store the character and exit from interrupt
	st Z+, 16
	ldi ZH, rxbufe
	ldi 16, 0x98
	cli			; disable interrupts
	out UCR, 16		; enable RXC interrupts
	cpse ZL, ZH		; wrap the buffer from top to bottom if needed
	reti
	ldi ZL, rxbuf
	reti

	;; data byte for the cassette load emulation
rxc_pulse:
	.if REVISION >= 3
	.if REVISION >= 4
	sbrc PTAP, 2
	rjmp rxc_noconv		; PTAP.2 set: packed load operation
	.endif
	sbrc PTAP, 0
	rjmp rxc_error		; end of transfer: accept no more data
	cpse 16, YH		; NUL: enter idle mode at end of stream
	rjmp rxc_noend
	inc PTAP		; set bit 0 of PTAP to signal end of stream
	cpse YL, ZL
	reti			; buffer not yet empty: return
	clr 16
	rjmp rxc_cmdok		; buffer empty: send '0' and go idle
rxc_noend:
	sbrc PTAP, 1
	rjmp rxc_noconv		; PTAP.1 set: TAP playback (no conversion)
	.endif
	subi 16, 0x41
	brlo rxc_error		; not a pulse character (smaller than 'A')
	.if REVISION >= 2
	cpi 16, 0x10
	brsh rxc_error		; not a pulse character (greater than 'P')
	cpi 16, 0x04
	brsh rxc_noconv
	;; compatibility mode ('A'..'D')
	mov YH, 16		; copy bits 0..1 to bits 2..3
	lsl YH
	lsl YH
	or 16, YH
	.else
	cpi 16, 0x04
	brsh rxc_error		; not a pulse character (greater than 'D')
	mov YH, ZL
	mov ZL, 16
	ld 16, Z
	mov ZL, YH
	.endif
	clr YH
rxc_noconv:
	cpse YL, ZL
	rjmp rxc_enqueue
	;; the buffer was empty
	in ZH, TCCR1B
	andi ZH, 7
	brne rxc_enqueue	; the timer was running, do not start it
	;; start the timer for pulse stream output
	ldi ZH, 0x80
	out TCCR1A, ZH		; clear OC1 on compare match
	ldi ZH, 0x0b
	out TCCR1B, ZH		; count pulses of CK/64, clear on compare match
	clr ZH
	out OCR1AH, ZH
	out OCR1AL, PARAM0	; time base for pause
	rjmp rxc_enqueue

	;; process a command
rxc_cmd:
	.if REVISION < 3
	cpi 16, 7		; number of commands
	.else
	.if REVISION < 4
	cpi 16, 8		; number of commands
	.else
	cpi 16, 9		; number of commands
	.endif
	.endif
	brlo rxc_cmdok
rxc_error:			; communication error: send a NUL character
	cli			; disable interrupts
	clr 16
	out UDR, 16		; send a NUL character
	clr PMIN		; clear the calibrated pulse widths
	clr PDIFF
	;; fall through with command = 0 (idle mode)
rxc_cmdok:
	sbi PORTB, 3		; raise READ
	clr PDATA
	clr YH
	out TIMSK, YH		; disable timer interrupts
	out TCCR0, YH		; timer 0 stopped
	out TCCR1A, YH		; timer 1 disconnected from OC1 output
	out TCCR1B, YH		; timer 1 stopped
	out TCNT1H, YH		; clear timer 1 counter
	out TCNT1L, YH		; (16 bits)
	ldi ZH, stacke-1
	out SPL, ZH		; restore the stack pointer

	;; echo the command and branch according to it
	mov ZH, 16
	ori ZH, 0x30
	out UDR, ZH

	clr ZL			; receive the parameters to the registers
	.if REVISION >= 4
	cpi 16, 8
	brsh cmd8_15
	.endif
	cpi 16, 4
	brsh cmd4_7
	;; 0 to 3
	cpi 16, 2
	brsh cmd2_3
	;; 0 or 1
	clr NPARAM		; no parameters
	sbrs 16, 0
	rjmp idle		; command 0
	rjmp pulse		; command 1
cmd2_3:	;; 2 or 3
	ldi ZH, 3
	mov NPARAM, ZH	; 3 parameters for save
	sbrs 16, 0
	rjmp save		; command 2
	inc NPARAM		; 4 parameters for load
	.if REVISION >= 3
	clr PTAP		; PTAP.0=0 (no EOF), PTAP.1=0 (no TAP)
	.endif
	rjmp load		; command 3
cmd4_7:	;; 4 to 7
	ldi ZH, 2
	mov NPARAM, ZH		; 2 parameters
	cpi 16, 6
	brsh cmd6_7
	;; 4 or 5
	sbi PORTB, 0		; SENSE=high
	sbrs 16, 0
	rjmp send		; command 4
	rjmp recv		; command 5
	.if REVISION < 4
cmd6_7:	;; 6 or 7
	.if REVISION >= 3
	sbrs 16, 0
	.endif
	rjmp recvcal		; command 6
	.if REVISION >= 3
	;; command 7
	ldi 16, 2		; TAP operation
	mov PTAP, 16		; PTAP.0=0 (no EOF), PTAP.1=1 (TAP playback)
	clr PARAM0		; pause pulse width
	clr NPARAM		; no parameters for command 7
	rjmp tap
	.endif
	.endif

	;; input capture for custom receive function
icp1recv:
	sbrc PARAM3, 6
	rjmp icp1recvhi
	in PFLO, ICR1L		; got a falling edge: sample the time
	in PFHI, ICR1H
	sbi PORTB, 3		; raise READ
	ldi 16, 0x49
	out TCCR1B, 16		; activate ICP on rising edge, CK/1
	reti
	;; got the maximum pulse width in PARAM3
icp1recvmax:
	sub PARAM3, PMIN
	brlo rxc_error
	mov ZL, PARAM3		; ZL is always 0 in this function
	swap ZL			; divide by 16
	andi ZL, 0x0f
	breq rxc_error		; not enough difference between pulse widths
	inc PARAM3
	mov PDIFF, PARAM3
	rjmp icp1recvdone	; done calibrating
	;; got a rising edge (measure the pulse width)
icp1recvhi:
	in PARAM3, ICR1L
	in 16, ICR1H
	sub PARAM3, PFLO	; calculate the pulse width
	sbc 16, PFHI
	lsr 16
	ror PARAM3
	lsr 16
	ror PARAM3		; PARAM3 scaled to CK/4 sample rate
	tst PDIFF
	brne icp1recvd		; already calibrated -> receive it
	tst PMIN
	brne icp1recvmax
	;; got the minimum pulse width in PARAM3
	mov PMIN, PARAM3	; store the minimum pulse width
	rjmp icp1recvdone

	.if REVISION >= 4
cmd6_7:	;; 6 or 7
	sbrs 16, 0
	rjmp recvcal		; command 6
	;; command 7
	ldi 16, 2		; TAP operation
	mov PTAP, 16		; PTAP.0=0 (no EOF), PTAP.1=1 (TAP playback)
	clr PARAM0		; pause pulse width
	clr NPARAM		; no parameters for command 7
	rjmp tap
cmd8_15:;; 8 to 15
	;; command 8
	ldi ZH, 4
	mov NPARAM, ZH
	mov PTAP, ZH		; PTAP.2=1 (packed load operation)
	rjmp load
	.endif

icp1recvd:
	;; quantize a data pulse to a nibble (in ZL)
	ldi ZL, 0xf0
	dec PARAM3		; allow some jitter
	sub PARAM3, PMIN
	brlo icp1recvn
	ldi ZH, 16		; number of alternatives
	mov PCMP, PDIFF		; indirectly set the maximum value (PSMAX)
	clr PARAM2		; minimum value

	;; match the pulse in a binary search
icp1recvdl:
	dec PCMP
	mov PSMAX, PCMP		; alter the upper bound
icp1recvdb:
	lsr ZH
	breq icp1recvn		; finished the search
	add PCMP, PARAM2
	ror PCMP		; PCMP = (PSMAX + PARAM2) / 2
	cp PARAM3, PCMP
	brlo icp1recvdl
	;; at least that much: add the decision value
	add ZL, ZH
	inc PCMP
	mov PARAM2, PCMP	; alter the lower bound
	mov PCMP, PSMAX
	rjmp icp1recvdb

	;; received a nibble in ZL (0xf0..0xff)
icp1recvn:
	tst PDATA
	brne icp1recvb
	mov PDATA, ZL
	rjmp icp1recvdone
icp1recvb:
	swap ZL
	and PDATA, ZL
	sbis USR, 5
	rjmp .-4		; wait for UART Data Register Empty (DRE)
	out UDR, PDATA		; output the data
	clr PDATA

	sec
	sbc PARAM0, PDATA	; decrement the number of bytes to receive
	sbc PARAM1, PDATA
	brcs icp1finish		; exit if no more bytes to receive

icp1recvdone:
	clr ZL
	cbi PORTB, 3		; drop READ to initiate next transfer
	ldi 16, 0x09
	out TCCR1B, 16		; activate ICP on falling edge, CK/1
	reti

	;; finished sending all bytes
icp1finishsend:
	ldi ZL, 0x40		; send '@' as an "end of transfer" flag
	out UDR, ZL
	;; finished receiving or sending all bytes
icp1finish:
	sbi PORTB, 3		; raise READ
	clr PDATA
	clr NPARAM
	ldi ZL, stacke-1
	out SPL, ZL		; restore the stack pointer
	clr ZL
	clr YH
	out TIMSK, YH		; disable ICP and OC interrupts
	out TCCR1A, YH		; timer 1 disconnected from OC1 output
	out TCCR1B, YH		; timer 1 stopped
	out TCNT1H, YH		; clear timer 1 counter
	out TCNT1L, YH		; (16 bits)
	rjmp idle

	;; output compare: stop the timer
oc1:	in 16, TCCR1A
	sbrc 16, 7
	rjmp oc1_load		; 'load' operation: load a pulse
	;; stop the timer and exit the interrupt
oc1_stop:
	in 16, TCCR1B
	andi 16, 0xf8
	out TCCR1B, 16
	reti

	;; input capture (cassette write capture)
icp1:	in PARAM3, TCCR1B	; PARAM3 is otherwise unused during these ops
	sbrc PARAM3, 3
	rjmp icp1nosend
	;; input capture (custom send function)
	sbrc PSCNT, 7
	rjmp icp1send0		; set up next data byte (PSCNT was negative)
	sbi PORTB, 3		; raise READ (1+2+2+2=7 cycles after request)
	ldi YH, 0x40
	eor PARAM3, YH
	out TCCR1B, PARAM3	; trigger on the opposite edge of WRITE
	clr YH
	dec PSCNT		; decrement the bit count
	brmi icp1send8		; all done (all bits sent)
	sbrs PDATA, 0		; read the data bit
	cbi PORTB, 3		; lower READ (7+9 cycles after req)
	lsr PDATA		; shift the data register
	reti			; return from interrupt
	;; all bits sent: decrement and compare the byte counter
icp1send8:
	sec
	sbc PARAM0, YH		; decrement the number of bytes to send
	sbc PARAM1, YH
	brcs icp1finishsend	; all bytes sent: switch to idle mode
	reti

	;; prepare for sending a byte
icp1send0:
	cpse YL, ZL		; buffer empty?
	rjmp icp1send_nonempty
	out TIMSK, YH		; no more data -> disable input capture
	reti
icp1send_nonempty:
	ld PDATA, Y+		; load a data byte
	cpi YL, rxbufe
	brne icp1send_nowrap
	ldi YL, rxbuf		; wrap the buffer pointer
icp1send_nowrap:
	mov YH, YL		; dequeuing pointer
	sub YH, ZL		; enqueuing pointer
	dec YH
	sbrc YH, 7
	subi YH, rxbuf-rxbufe	; reduce the difference modulo the buffer
	cpi YH, (rxbufe-rxbuf)*3/4
	brlo icp1send_nodc1	; buffer more than 25% full
	cts sbis
	rjmp icp1send_nodc1	; exit if receiving was previously enabled
	ldi YH, 0x11		; DC1, ctrl-q, permission to send
	out UDR, YH
	cts cbi			; drop CTS (start receiving)
icp1send_nodc1:
	cbi PORTB, 3		; lower READ to initiate the transfer
	ldi YH, 0x42		; activate ICP on rising edge, no CTC1, CK/8
	out TCCR1B, YH
	ldi YH, 8
	mov PSCNT, YH		; initialize the bit counter
	clr YH
	reti

	;; input capture (cassette write; other than the custom send function)
icp1nosend:
	sbrs PARAM3, 7
	rjmp icp1recv
	out TCNT1H, YH		; clear timer 1 counter
	out TCNT1L, YH		; (16 bits)
	ldi 16, 0xcb		; activate ICP on rising edge, ICNC, CK/64
	out TCCR1B, 16
	in 16, ICR1L		; read the input
	tst PARAM0
	breq icp1pw		; pulse width measurement
	;; save mode (quantize the pulse widths)
	mov ZH, 16
	ldi 16, 0x41		; use signals 'A','B','C','D'
	tst ZH
	breq icp1pw		; pause detected
	inc 16
	cp ZH, PARAM0
	brlo icp1pw		; short pulse
	inc 16
	cp ZH, PARAM1
	brlo icp1pw		; medium pulse
	inc 16			; long pulse
	;; fall through
icp1pw:	sbis USR, 5		; pulse width measurement
	rjmp icp1pw		; wait for UART Data Register Empty (DRE)
	out UDR, 16		; send out the pulse width
	reti

	;; output compare for load operation: get next pulse
oc1_load:
	sbrs 16, 6
	rjmp oc1_next
	ldi 16, 0x80
	out TCCR1A, 16		; lower OC1 on the following interrupt
	.if REVISION >= 2
	.if REVISION >= 3
	.if REVISION >= 4
	sbrc PTAP, 2
	rjmp oc1_pload		; PTAP.2 set: packed load operation
	.endif
	sbrc PTAP, 1
	rjmp oc1_tap		; PTAP.1 set: TAP playback
	.endif
	mov YH, ZL		; load the next pulse
	mov ZL, PWIDTH
oc1_load_shifted:
	lsr ZL
	lsr ZL			; get bits 2..3 of the current pulse
oc1_load_out:
	ld 16, Z		; look up the pulse width from PARAM0..PARAM3
	mov ZL, YH
	clr YH
	out OCR1AH, YH
	out OCR1AL, 16		; write the timer
	reti
	.if REVISION >= 4
oc1_pload:
	ldi YH, 2
	eor PTAP, YH		; toggle PTAP.1, the nibble counter
	mov YH, ZL
	mov ZL, PWIDTH
	swap PWIDTH		; swap the high and low nibble
	andi ZL, 12
	rjmp oc1_load_shifted
	.endif
	.if REVISION >= 3
oc1_tap:
	out OCR1AH, YH
	out OCR1AL, PWIDTH
	reti
	.endif
	.else
	reti
	.endif
oc1_next:
	.if REVISION >= 4
	ldi 16, 6
	cpse PTAP, 16
	rjmp oc1_dequeue
	;; packed load operation, play back next half-pulse
	ldi 16, 0xc0
	out TCCR1A, 16		; raise OC1 on the following interrupt
	mov YH, ZL
	mov ZL, PWIDTH
	andi ZL, 3
	rjmp oc1_load_out
oc1_dequeue:
	.endif
	cpse YL, ZL		; buffer empty?
	rjmp oc1_nonempty
	.if REVISION >= 3
	clr 16
	.if REVISION >= 4
	sbrs PTAP, 2		; end of packed load stream: go idle
	.endif
	sbrc PTAP, 0		; got NUL byte in rxc_pulse?
	rjmp rxc_cmdok		; end of stream: send '0' and go idle
	.endif
	rjmp oc1_stop		; no more data -> stop the timer
oc1_nonempty:
	ldi 16, 0xc0
	out TCCR1A, 16		; raise OC1 on the following interrupt
	.if REVISION >= 2
	ld PWIDTH, Y+		; load the next pulse
	mov 16, PWIDTH
	.if REVISION >= 3
	sbrc PTAP, 1
	rjmp oc1_noconv		; TAP operation: no pulse width conversion
	.endif
	andi 16, 3		; get bits 0..1
	mov YH, ZL		; convert the pulse
	mov ZL, 16
	ld 16, Z		; look up the pulse width from PARAM0..PARAM3
	mov ZL, YH
	clr YH
oc1_noconv:
	.else
	ld 16, Y+
	.endif
	out OCR1AH, YH
	out OCR1AL, 16		; write the timer
	cpi YL, rxbufe
	brne oc1_next_nowrap
	ldi YL, rxbuf		; wrap the buffer pointer
oc1_next_nowrap:
	mov ZH, YL		; dequeuing pointer
	sub ZH, ZL		; enqueuing pointer
	dec ZH
	sbrc ZH, 7
	subi ZH, rxbuf-rxbufe	; reduce the difference modulo the buffer
	cpi ZH, (rxbufe-rxbuf)*3/4
	brlo oc1_ret		; buffer more than 25% full
	cts sbis
	reti			; exit if receiving was previously enabled
	ldi 16, 0x11		; DC1, ctrl-q, permission to send
	out UDR, 16
	cts cbi			; drop CTS (start receiving)
oc1_ret:
	reti

; Local variables:
; compile-command: "avr-as c2n232.s && objcopy -O srec a.out && cisp -c c2n232 /dev/ttyS0 -e -l a.out -v a.out"
; End:
