Thread: [Linux1394-cvslog] rev 522 - trunk
Brought to you by:
aeb,
bencollins
From: SVN U. <hog...@us...> - 2002-07-15 16:37:50
|
Author: hogsberg Date: Sun, 14 Jul 2002 16:19:40 -0400 New Revision: 522 Modified: trunk/amdtp.c trunk/amdtp.h Log: New amdtp features: blocking transmission mode, sample rates up to 192kHz. Modified: trunk/amdtp.c ============================================================================== --- trunk/amdtp.c (original) +++ trunk/amdtp.c Mon Jul 15 12:37:37 2002 @@ -32,7 +32,7 @@ * That is outside the scope of this driver, and furthermore it is not * really standardized yet. * - * The Audio and Music Data Tranmission Protocol is avaiable at + * The Audio and Music Data Tranmission Protocol is available at * * http://www.1394ta.org/Download/Technology/Specifications/2001/AM20Final-jf2.pdf * @@ -89,9 +89,13 @@ #define FMT_AMDTP 0x10 #define FDF_AM824 0x00 -#define FDF_SFC_32KHZ 0x00 /* 32kHz */ -#define FDF_SFC_44K1HZ 0x01 /* 44.1kHz */ -#define FDF_SFC_48KHZ 0x02 /* 44.1kHz */ +#define FDF_SFC_32KHZ 0x00 +#define FDF_SFC_44K1HZ 0x01 +#define FDF_SFC_48KHZ 0x02 +#define FDF_SFC_88K2HZ 0x03 +#define FDF_SFC_96KHZ 0x04 +#define FDF_SFC_176K4HZ 0x05 +#define FDF_SFC_192KHZ 0x06 struct descriptor_block { struct output_more_immediate { @@ -113,15 +117,42 @@ struct packet { struct descriptor_block *db; dma_addr_t db_bus; - quadlet_t *payload; + struct iso_packet *payload; dma_addr_t payload_bus; }; +#ifdef __BIG_ENDIAN_BITFIELD + +#error Big endian bitfields not tested + +#else + +struct iso_packet { + /* First quadlet */ + unsigned int sid : 6; + unsigned int eoh0 : 2; + unsigned int dbs : 8; + unsigned int reserved : 2; + unsigned int sph : 1; + unsigned int qpc : 3; + unsigned int fn : 2; + unsigned int dbc : 8; + + /* Second quadlet */ + unsigned int fmt : 6; + unsigned int eoh1 : 2; + unsigned int fdf : 8; + unsigned int syt : 16; + + quadlet_t data[0]; +}; + +#endif + struct fraction { int integer; int numerator; int denominator; - int counter; }; #define PACKET_LIST_SIZE 256 @@ -148,6 +179,8 @@ int rate; int dimension; int fdf; + int mode; + int sample_format; struct cmp_pcr *opcr; /* Input samples are copied here. */ @@ -157,7 +190,7 @@ unsigned char dbc; struct packet_list *current_packet_list; int current_packet; - struct fraction packet_size_fraction; + struct fraction ready_samples, samples_per_cycle; /* We use these to generate control bits when we are packing * iec958 data. @@ -176,8 +209,7 @@ * written back in the dma programs. */ atomic_t cycle_count, cycle_count2; - int cycle_offset; - struct fraction syt_fraction; + struct fraction cycle_offset, ticks_per_syt_offset; int syt_interval; int stale_count; @@ -255,7 +287,7 @@ OHCI1394_CONTEXT_WAKE); } -void ohci1394_stop_it_ctx(struct ti_ohci *ohci, int ctx) +void ohci1394_stop_it_ctx(struct ti_ohci *ohci, int ctx, int synchronous) { u32 control; int wait; @@ -265,13 +297,15 @@ OHCI1394_CONTEXT_RUN); wmb(); - for (wait = 0; wait < 5; wait++) { - control = reg_read(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16); - if ((control & OHCI1394_CONTEXT_ACTIVE) == 0) - break; - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + if (synchronous) { + for (wait = 0; wait < 5; wait++) { + control = reg_read(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16); + if ((control & OHCI1394_CONTEXT_ACTIVE) == 0) + break; + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } } } @@ -297,6 +331,22 @@ return pl; } +static void stream_start_dma(struct stream *s, struct packet_list *pl) +{ + u32 syt, cycle_count; + + cycle_count = reg_read(s->host->host->hostdata, + OHCI1394_IsochronousCycleTimer) >> 12; + syt = be16_to_cpu(pl->packets[0].payload->syt) & 0x0f; + cycle_count = (cycle_count & ~0x0f) + 32 + syt; + if ((cycle_count & 0x1fff) >= 8000) + cycle_count = cycle_count - 8000 + 0x2000; + + ohci1394_start_it_ctx(s->host->ohci, s->iso_context, + pl->packets[0].db_bus, 3, + cycle_count & 0x7fff); +} + static void stream_put_dma_packet_list(struct stream *s, struct packet_list *pl) { @@ -317,20 +367,8 @@ last->db->payload_desc.branch = pl->packets[0].db_bus | 3; ohci1394_wake_it_ctx(s->host->ohci, s->iso_context); } - else { - u32 syt, cycle_count; - - cycle_count = reg_read(s->host->host->hostdata, - OHCI1394_IsochronousCycleTimer) >> 12; - syt = (pl->packets[0].payload[1] >> 12) & 0x0f; - cycle_count = (cycle_count & ~0x0f) + 32 + syt; - if ((cycle_count & 0x1fff) >= 8000) - cycle_count = cycle_count - 8000 + 0x2000; - - ohci1394_start_it_ctx(s->host->ohci, s->iso_context, - pl->packets[0].db_bus, 3, - cycle_count & 0x7fff); - } + else + stream_start_dma(s, pl); } static void stream_shift_packet_lists(struct stream *s) @@ -430,17 +468,42 @@ f->integer = numerator / denominator; f->numerator = numerator % denominator; f->denominator = denominator; - f->counter = 0; } -static int fraction_next_size(struct fraction *f) +static __inline__ void fraction_add(struct fraction *dst, + struct fraction *src1, + struct fraction *src2) +{ + /* assert: src1->denominator == src2->denominator */ + + int sum, denom; + + /* We use these two local variables to allow gcc to optimize + * the division and the modulo into only one division. */ + + sum = src1->numerator + src2->numerator; + denom = src1->denominator; + dst->integer = src1->integer + src2->integer + sum / denom; + dst->numerator = sum % denom; + dst->denominator = denom; +} + +static __inline__ void fraction_sub_int(struct fraction *dst, + struct fraction *src, int integer) { - return f->integer + ((f->counter + f->numerator) / f->denominator); + dst->integer = src->integer - integer; + dst->numerator = src->numerator; + dst->denominator = src->denominator; } -static void fraction_inc(struct fraction *f) +static __inline__ int fraction_floor(struct fraction *frac) { - f->counter = (f->counter + f->numerator) % f->denominator; + return frac->integer; +} + +static __inline__ int fraction_ceil(struct fraction *frac) +{ + return frac->integer + (frac->numerator > 0 ? 1 : 0); } static void amdtp_irq_handler(int card, quadlet_t isoRecvIntEvent, @@ -633,17 +696,33 @@ } } +static void fill_payload_le16(struct stream *s, quadlet_t *data, int nevents) +{ + quadlet_t *event, sample, bits; + unsigned char *p; + int i, j; + + for (i = 0, event = data; i < nevents; i++) { + + for (j = 0; j < s->dimension; j++) { + p = buffer_get_bytes(s->input, 2); + sample = (p[1] << 16) | (p[0] << 8); + bits = get_header_bits(s, j, sample); + event[j] = cpu_to_be32((bits << 24) | sample); + } + + event += s->dimension; + if (++s->iec958_frame_count == 192) + s->iec958_frame_count = 0; + } +} static void fill_packet(struct stream *s, struct packet *packet, int nevents) { - int size, node_id, i, j; - quadlet_t *event; - unsigned char *p; - u32 control, sample, bits; - int syt_index, syt, next; + int syt_index, syt, size; + u32 control; size = (nevents * s->dimension + 2) * sizeof(quadlet_t); - node_id = s->host->host->node_id & 0x3f; /* Update DMA descriptors */ packet->db->payload_desc.status = 0; @@ -656,41 +735,46 @@ (s->iso_channel << 8) | (TCODE_ISO_DATA << 4); packet->db->header_desc.header[1] = size << 16; - /* Fill cip header */ - syt_index = s->dbc & (s->syt_interval - 1); - if (syt_index == 0 || syt_index + nevents > s->syt_interval) { + /* Calculate synchronization timestamp (syt). First we + * determine syt_index, that is, the index in the packet of + * the sample for which the timestamp is valid. */ + syt_index = (s->syt_interval - s->dbc) & (s->syt_interval - 1); + if (syt_index < nevents) { syt = ((atomic_read(&s->cycle_count) << 12) | - s->cycle_offset) & 0xffff; - next = fraction_next_size(&s->syt_fraction) + s->cycle_offset; - /* This next addition should be modulo 8000 (0x1f40), - * but we only use the lower 4 bits of cycle_count, so - * we dont need the modulo. */ - atomic_add(next / 3072, &s->cycle_count); - s->cycle_offset = next % 3072; - fraction_inc(&s->syt_fraction); + s->cycle_offset.integer) & 0xffff; + fraction_add(&s->cycle_offset, + &s->cycle_offset, &s->ticks_per_syt_offset); + if (s->cycle_offset.integer >= 3072) { + /* This next addition should be modulo 8000 + * (0x1f40), but we only use the lower 4 bits + * of cycle_count, so we dont need the modulo. */ + atomic_add(s->cycle_offset.integer / 3072, &s->cycle_count); + s->cycle_offset.integer %= 3072; + } } - else { + else syt = 0xffff; - next = 0; - } - atomic_inc(&s->cycle_count2); - - packet->payload[0] = cpu_to_be32((node_id << 24) | (s->dimension << 16) | s->dbc); - packet->payload[1] = cpu_to_be32((1 << 31) | (FMT_AMDTP << 24) | (s->fdf << 16) | syt); - - /* Fill payload */ - for (i = 0, event = &packet->payload[2]; i < nevents; i++) { - for (j = 0; j < s->dimension; j++) { - p = buffer_get_bytes(s->input, 2); - sample = (p[1] << 16) | (p[0] << 8); - bits = get_header_bits(s, j, sample); - event[j] = cpu_to_be32((bits << 24) | sample); - } - - event += s->dimension; - if (++s->iec958_frame_count == 192) - s->iec958_frame_count = 0; + atomic_inc(&s->cycle_count2); + + /* Fill cip header */ + packet->payload->eoh0 = 0; + packet->payload->sid = s->host->host->node_id & 0x3f; + packet->payload->dbs = s->dimension; + packet->payload->fn = 0; + packet->payload->qpc = 0; + packet->payload->sph = 0; + packet->payload->reserved = 0; + packet->payload->dbc = s->dbc; + packet->payload->eoh1 = 2; + packet->payload->fmt = FMT_AMDTP; + packet->payload->fdf = s->fdf; + packet->payload->syt = cpu_to_be16(syt); + + switch (s->sample_format) { + case AMDTP_INPUT_LE16: + fill_payload_le16(s, packet->payload->data, nevents); + break; } s->dbc += nevents; @@ -700,13 +784,44 @@ { struct packet *p; int nevents; + struct fraction next; + + /* The AMDTP specifies two transmission modes: blocking and + * non-blocking. In blocking mode you always transfer + * syt_interval or zero samples, whereas in non-blocking mode + * you send as many samples as you have available at transfer + * time. + * + * The fraction samples_per_cycle specifies the number of + * samples that become available per cycle. We add this to + * the fraction ready_samples, which specifies the number of + * leftover samples from the previous transmission. The sum, + * stored in the fraction next, specifies the number of + * samples available for transmission, and from this we + * determine the number of samples to actually transmit. + */ + + while (1) { + fraction_add(&next, &s->ready_samples, &s->samples_per_cycle); + if (s->mode == AMDTP_MODE_BLOCKING) { + if (fraction_floor(&next) >= s->syt_interval) + nevents = s->syt_interval; + else + nevents = 0; + } + else + nevents = fraction_floor(&next); + + p = stream_current_packet(s); + if (s->input->length < nevents * s->dimension * 2 || p == NULL) + break; - while (nevents = fraction_next_size(&s->packet_size_fraction), - p = stream_current_packet(s), - nevents * s->dimension * 2 <= s->input->length && p != NULL) { fill_packet(s, p, nevents); - fraction_inc(&s->packet_size_fraction); stream_queue_packet(s); + + /* Now that we have successfully queued the packet for + * transmission, we update the fraction ready_samples. */ + fraction_sub_int(&s->ready_samples, &next, nevents); } } @@ -714,9 +829,10 @@ { int max_nevents, max_packet_size, i; - max_nevents = s->packet_size_fraction.integer; - if (s->packet_size_fraction.numerator > 0) - max_nevents++; + if (s->mode == AMDTP_MODE_BLOCKING) + max_nevents = s->syt_interval; + else + max_nevents = fraction_ceil(&s->samples_per_cycle); max_packet_size = max_nevents * s->dimension * 4 + 8; s->packet_pool = pci_pool_create("packet pool", s->host->ohci->dev, @@ -763,15 +879,20 @@ plug->p2p_count, plug->channel); s->iso_channel = plug->channel; if (plug->p2p_count > 0) { - /* start streaming */ + struct packet_list *pl; + + pl = list_entry(s->dma_packet_lists.next, struct packet_list, link); + stream_start_dma(s, pl); } else { - /* stop streaming */ + ohci1394_stop_it_ctx(s->host->ohci, s->iso_context, 0); } } static int stream_configure(struct stream *s, int cmd, struct amdtp_ioctl *cfg) { + const int transfer_delay = 3000; + if (cfg->format <= AMDTP_FORMAT_IEC958_AC3) s->format = cfg->format; else @@ -782,32 +903,59 @@ s->syt_interval = 8; s->fdf = FDF_SFC_32KHZ; s->iec958_rate_code = 0x0c; - s->rate = cfg->rate; break; case 44100: s->syt_interval = 8; s->fdf = FDF_SFC_44K1HZ; s->iec958_rate_code = 0x00; - s->rate = cfg->rate; break; case 48000: s->syt_interval = 8; s->fdf = FDF_SFC_48KHZ; s->iec958_rate_code = 0x04; - s->rate = cfg->rate; break; + case 88200: + s->syt_interval = 16; + s->fdf = FDF_SFC_88K2HZ; + s->iec958_rate_code = 0x00; + break; + case 96000: + s->syt_interval = 16; + s->fdf = FDF_SFC_96KHZ; + s->iec958_rate_code = 0x00; + break; + case 176400: + s->syt_interval = 32; + s->fdf = FDF_SFC_176K4HZ; + s->iec958_rate_code = 0x00; + break; + case 192000: + s->syt_interval = 32; + s->fdf = FDF_SFC_192KHZ; + s->iec958_rate_code = 0x00; + break; + default: return -EINVAL; } - fraction_init(&s->packet_size_fraction, s->rate, 8000); + s->rate = cfg->rate; + fraction_init(&s->samples_per_cycle, s->rate, 8000); + fraction_init(&s->ready_samples, 0, 8000); - /* The syt_fraction is initialized to the number of ticks - * between syt_interval events. The number of ticks per + /* The ticks_per_syt_offset is initialized to the number of + * ticks between syt_interval events. The number of ticks per * second is 24.576e6, so the number of ticks between * syt_interval events is 24.576e6 * syt_interval / rate. */ - fraction_init(&s->syt_fraction, 24576000 * s->syt_interval, s->rate); + fraction_init(&s->ticks_per_syt_offset, + 24576000 * s->syt_interval, s->rate); + fraction_init(&s->cycle_offset, (transfer_delay % 3072) * s->rate, s->rate); + atomic_set(&s->cycle_count, transfer_delay / 3072); + atomic_set(&s->cycle_count2, 0); + + s->mode = cfg->mode; + s->sample_format = AMDTP_INPUT_LE16; /* When using the AM824 raw subformat we can stream signals of * any dimension. The IEC958 subformat, however, only @@ -858,7 +1006,6 @@ { struct stream *s; unsigned long flags; - const int transfer_delay = 8651; /* approx 352 us */ s = kmalloc(sizeof(struct stream), SLAB_KERNEL); if (s == NULL) @@ -873,10 +1020,6 @@ return NULL; } - s->cycle_offset = transfer_delay % 3072; - atomic_set(&s->cycle_count, transfer_delay / 3072); - atomic_set(&s->cycle_count2, 0); - s->descriptor_pool = pci_pool_create("descriptor pool", host->ohci->dev, sizeof(struct descriptor_block), 16, 0, SLAB_KERNEL); @@ -920,7 +1063,7 @@ wait_event_interruptible(s->packet_list_wait, list_empty(&s->dma_packet_lists)); - ohci1394_stop_it_ctx(s->host->ohci, s->iso_context); + ohci1394_stop_it_ctx(s->host->ohci, s->iso_context, 1); ohci1394_free_it_ctx(s->host->ohci, s->iso_context); if (s->opcr != NULL) @@ -996,8 +1139,8 @@ case AMDTP_IOC_PING: HPSB_INFO("ping: offsetting timpestamps %ld ticks", arg); - new = s->cycle_offset + arg; - s->cycle_offset = new % 3072; + new = s->cycle_offset.integer + arg; + s->cycle_offset.integer = new % 3072; atomic_add(new / 3072, &s->cycle_count); return 0; Modified: trunk/amdtp.h ============================================================================== --- trunk/amdtp.h (original) +++ trunk/amdtp.h Mon Jul 15 12:37:38 2002 @@ -19,13 +19,24 @@ * AMDTP_FORMAT_IEC958_AC3 will transmit the samples with the data bit * set, suitable for transmitting compressed AC-3 audio. * - * The rate field specifies the transmission rate; supported values are - * AMDTP_RATE_32KHZ, AMDTP_RATE_44K1HZ and AMDTP_RATE_48KHZ. + * The rate field specifies the transmission rate; supported values + * are 32000, 44100, 48000, 88200, 96000, 176400 and 192000. * * The dimension field specifies the dimension of the signal, that is, * the number of audio channels. Only AMDTP_FORMAT_RAW supports * settings greater than 2. * + * The mode field specifies which transmission mode to use. The AMDTP + * specifies two different transmission modes: blocking and + * non-blocking. The blocking transmission mode always send a fixed + * number of samples, typically 8, 16 or 32. To exactly match the + * transmission rate, the driver alternates between sending empty and + * non-empty packets. In non-blocking mode, the driver transmits as + * small packets as possible. For example, for a transmission rate of + * 44100Hz, the driver should send 5 41/80 samples in every cycle, but + * this is not possible so instead the driver alternates between + * sending 5 and 6 samples. + * * The last thing to specify is either the isochronous channel to use * or the output plug to connect to. If you know what channel the * destination device will listen on, you can specify the channel @@ -60,15 +71,20 @@ }; enum { - AMDTP_RATE_32KHZ, - AMDTP_RATE_44K1HZ, - AMDTP_RATE_48KHZ, + AMDTP_MODE_BLOCKING, + AMDTP_MODE_NON_BLOCKING, +}; + +enum { + AMDTP_INPUT_LE16, + AMDTP_INPUT_BE16, }; struct amdtp_ioctl { __u32 format; __u32 rate; __u32 dimension; + __u32 mode; union { __u32 channel; __u32 plug; } u; }; |