From 9da222f4c4bce068b3c76fda28a5c3dbc6bb2442 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Mon, 7 Oct 2024 20:23:49 -0700 Subject: [PATCH] Audio recording support, part 2 The demodulator code from the original CiderPress has been ported. Some of the setup works a little differently, but the signal processing code functions identically. --- CommonUtil/CassetteDecoder.cs | 589 ++++++++++++++++++++++++ CommonUtil/WAVFile.cs | 173 +++++-- DiskArc/Arc/AudioRecording-notes.md | 97 +++- DiskArc/Arc/AudioRecording.cs | 11 +- DiskArc/Arc/AudioRecording_FileEntry.cs | 2 +- 5 files changed, 831 insertions(+), 41 deletions(-) create mode 100644 CommonUtil/CassetteDecoder.cs diff --git a/CommonUtil/CassetteDecoder.cs b/CommonUtil/CassetteDecoder.cs new file mode 100644 index 0000000..e541f4f --- /dev/null +++ b/CommonUtil/CassetteDecoder.cs @@ -0,0 +1,589 @@ +/* + * Copyright 2024 faddenSoft + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Diagnostics; + +namespace CommonUtil { + /// + /// Decodes (demodulates) a sound file with Apple II cassette data into chunks. + /// The details of the format, and some notes on the approach we use here, are described in + /// DiskArc/Arc/AudioRecording-notes.md. + /// + /// + /// This is a fairly direct port of the code in the original CiderPress. See the + /// CassetteDialog.[h,cpp] + /// implementation. + /// Two basic algorithms are used: measuring the distance between zero-crossings, and + /// measuring the distance between peaks. The former is what the Apple II does, and is + /// generally reliable, but it fails if the cassette has developed a DC bias or a large + /// low-frequency distortion that pushes the signal off the zero line. Measuring between + /// peaks is a little trickier because some recordings have two peaks (one lower than the + /// other) instead of one, so simply watching for a change in direction doesn't work. + /// + public class CassetteDecoder { + /// + /// Algorithm to use when analyzing samples. + /// + public enum Algorithm { + Unknown = 0, Zero, SharpPeak, RoundPeak, ShallowPeak + } + + /// + /// One chunk of data decoded from the sound stream. + /// + public class Chunk { + /// + /// Full file data, minus the final (checksum) byte. + /// + public byte[] Data { get; private set; } + + /// + /// Checksum value read from the data stream. + /// + public byte ReadChecksum { get; private set; } + + /// + /// Checksum value calculated from the data stream. This will be 0x00 if all is + /// well. If not, the data may be damaged (or, rarely, copy-protected). + /// + public byte CalcChecksum { get; private set; } + + /// + /// True if the checksum did not match. + /// + public bool BadChecksum => CalcChecksum != 0; + + /// + /// True if the file didn't end on a byte boundary. This suggests that a burst + /// of noise ended the process prematurely. + /// + public bool BadEnd { get; private set; } + + /// + /// Sample number of start of chunk in sound file. Useful when examining the sound + /// file with an editor. + /// + public int StartSample { get; private set; } + + /// + /// First sample past end of chunk in sound file. + /// + public int EndSample { get; private set; } + + public Chunk(byte[] data, byte readChecksum, byte calcChecksum, bool badEnd, + int startSample, int endSample) { + Data = data; + ReadChecksum = readChecksum; + CalcChecksum = calcChecksum; + BadEnd = badEnd; + StartSample = startSample; + EndSample = endSample; + } + + public override string ToString() { + return Data.Length + " bytes, ex-sum=$" + ReadChecksum.ToString("x2") + + " act-sum=$" + CalcChecksum.ToString("x2") + ", badEnd=" + BadEnd + + " start=" + StartSample +", end=" + EndSample; + } + } + + // + // Innards. + // + + private WAVFile mWavFile; + private Algorithm mAlg; + + private List mChunks = new List(); + + private const int BUFFER_SIZE = 65536; // must be mult of 4, in case of 16-bit stereo + private const int MAX_FILE_LEN = 512 * 1024; // 512KB is a ~45 minute recording + + // Width of 1/2 cycle in 770Hz lead-in (1000000 / 770 / 2). + private const float LEAD_IN_HALF_WIDTH_USEC = 650.0f; + // Max error when detecting 770Hz lead-in, allows [542,758] usec + private const float LEAD_IN_MAX_ERROR_USEC = 108.0f; + // Width of 1/2 cycle of "short 0". + private const float SHORT_ZERO_HALF_WIDTH_USEC = 200.0f; + // Max error when detecting short 0 (allows [50,350] usec). + private const float SHORT_ZERO_MAX_ERROR_USEC = 150.0f; + // Width of 1/2 cycle of '0' (2kHz). + private const float ZERO_HALF_WIDTH_USEC = 250.0f; + // Max error when detecting '0'. + private const float ZERO_MAX_ERROR_USEC = 94.0f; + // Width of 1/2 cycle of '1' (1kHz). + private const float ONE_HALF_WIDTH_USEC = 500.0f; + // Max error when detecting '1'. + private const float ONE_MAX_ERROR_USEC = 94.0f; + // After this many 770Hz half-cycles, start looking for short 0. + private const int LEAD_IN_HALF_CYC_THRESHOLD = 1540; // 1 second + + // Amplitude must change by this much before we switch out of "peak" mode. + private const float PEAK_THRESHOLD = 0.2f; // 10% + // Amplitude must change by at least this much to stay in "transition" mode. + private const float TRANS_MIN_DELTA = 0.02f; // 1% + // TRANS_MIN_DELTA happens over this range (1 sample at 22.05kHz). + private const float TRANS_DELTA_BASE_USEC = 43.35f; + + // Decoder state. + private enum State { + Unknown = 0, + ScanFor770Start, + Scanning770, + ScanForShort0, + Short0B, + ReadData, + EndReached + } + + // Bit decode state. + private enum Mode { + Unknown = 0, + Initial0, + Initial1, + InTransition, + AtPeak, + Running + } + + // Scan state. + private State mState; + private Mode mMode; + private bool mPositive; + + private int mLastZeroIndex; + private int mLastPeakStartIndex; + private float mLastPeakStartValue; + + private float mPrevSample; + + private float mHalfCycleWidthUsec; + private int mNum770; // # of consecutive 770Hz cycles + private int mDataStart; + private int mDataEnd; + + private float mUsecPerSample; // constant + + + /// + /// Private constructor. + /// + private CassetteDecoder(WAVFile waveFile, Algorithm alg) { + mWavFile = waveFile; + mAlg = alg; + } + + /// + /// Decodes a stream of audio samples into data chunks. + /// + /// Processed RIFF file with WAVE data (.wav). + /// List of chunks, in the order in which they appear in the sound file. + public static List DecodeFile(WAVFile wavFile, Algorithm alg) { + CassetteDecoder decoder = new CassetteDecoder(wavFile, alg); + decoder.Scan(); + return decoder.mChunks; + } + + /// + /// Scans the contents of the audio file, generating file chunks as it goes. + /// + private void Scan() { + Debug.Assert(mWavFile.FormatTag == WAVFile.WAVE_FORMAT_PCM); + Debug.WriteLine("Scanning file: " + mWavFile.GetInfoString()); + + MemoryStream outStream = new MemoryStream(); + float[] sampleBuf = new float[16384]; + mUsecPerSample = 1000000.0f / mWavFile.SamplesPerSec; + + bool doInit = true; + byte checksum = 0; + int bitAcc = 0; + int curSampleIndex = 0; + + mWavFile.SeekToStart(); + while (true) { + int startSampleIndex = -1; + int count = mWavFile.GetSamples(sampleBuf, 0); + if (count == 0) { + break; // EOF reached + } else if (count == -1) { + // Whatever caused this should have been caught earlier. + throw new NotSupportedException("unable to get samples"); + } + + for (int i = 0; i < count; i++) { + if (doInit) { + mState = State.ScanFor770Start; + mMode = Mode.Initial0; + mPositive = false; + checksum = 0xff; + bitAcc = 1; + outStream.SetLength(0); + doInit = false; + } + + int bitVal; + bool gotBit; + switch (mAlg) { + case Algorithm.Zero: + gotBit = + ProcessSampleZero(sampleBuf[i], curSampleIndex + i, out bitVal); + break; + case Algorithm.SharpPeak: + case Algorithm.RoundPeak: + case Algorithm.ShallowPeak: + gotBit = + ProcessSamplePeak(sampleBuf[i], curSampleIndex + i, out bitVal); + break; + default: + throw new NotImplementedException("what is " + mAlg); + } + if (gotBit) { + // Shift the bit into the byte, and output it when we get 8 bits. + Debug.Assert(bitVal == 0 || bitVal == 1); + bitAcc = (bitAcc << 1) | bitVal; + if (bitAcc > 0xff) { + outStream.WriteByte((byte)bitAcc); + checksum ^= (byte)bitAcc; + bitAcc = 1; + } + + if (outStream.Length > MAX_FILE_LEN) { + // Something must be off. + Debug.WriteLine("Found enormous file on cassette, abandoning"); + mState = State.EndReached; + } + } + if (mState == State.Scanning770 && startSampleIndex < 0) { + startSampleIndex = curSampleIndex + i; + } + if (mState == State.EndReached) { + // Copy data and create the chunk object. + Chunk chunk; + if (outStream.Length == 0) { + chunk = new Chunk(new byte[0], 0x00, 0xff, bitAcc != 1, + mDataStart, mDataEnd); + } else { + byte[] fileData = new byte[outStream.Length - 1]; + outStream.Position = 0; + outStream.ReadExactly(fileData, 0, fileData.Length); + int readChecksum = outStream.ReadByte(); + chunk = new Chunk(fileData, (byte)readChecksum, checksum, + bitAcc != 1, mDataStart, mDataEnd); + } + Debug.WriteLine("READ: " + chunk.ToString()); + mChunks.Add(chunk); + doInit = true; + } + } + + curSampleIndex += count; + } + + switch (mState) { + case State.ScanFor770Start: + case State.Scanning770: + case State.EndReached: + Debug.WriteLine("Scan ended in normal state: " + mState); + break; + default: + Debug.WriteLine("Scan ended in unexpected state: " + mState); + break; + } + } + + /// + /// Implements the zero-crossing algorithm. + /// + private bool ProcessSampleZero(float sample, int sampleIndex, out int bitVal) { + bitVal = 0; + + // Analyze the mode, changing to a new one when appropriate. + bool crossedZero = false; + switch (mMode) { + case Mode.Initial0: + Debug.Assert(mState == State.ScanFor770Start); + mMode = Mode.Running; + break; + case Mode.Running: + if (mPrevSample < 0.0f && sample >= 0.0f || + mPrevSample >= 0.0f && sample < 0.0f) { + crossedZero = true; + } + break; + default: + Debug.Assert(false); + break; + } + + + // Deal with a zero-crossing. + // + // We currently just grab the first point after we cross. We should + // be grabbing the closest point or interpolating across. + bool emitBit = false; + if (crossedZero) { + float halfCycleUsec; + int bias; + + if (Math.Abs(mPrevSample) < Math.Abs(sample)) { + bias = -1; // previous sample was closer to zero point + } else { + bias = 0; // current sample is closer + } + + // Delta time for zero-to-zero (half cycle). + int timeDelta = (sampleIndex + bias) - mLastZeroIndex; + + halfCycleUsec = timeDelta * mUsecPerSample; + emitBit = UpdateState(sampleIndex + bias, halfCycleUsec, out bitVal); + mLastZeroIndex = sampleIndex + bias; + } + + // Remember this sample for the next go-round. + mPrevSample = sample; + + return emitBit; + } + + /// + /// Implements the peak-to-peak algorithm. + /// + private bool ProcessSamplePeak(float sample, int sampleIndex, out int bitVal) { + bitVal = 0; + float ampDelta; + float transitionLimit; + bool hitPeak = false; + bool emitBit = false; + + // Analyze the mode, changing to a new one when appropriate. + switch (mMode) { + case Mode.Initial0: + Debug.Assert(mState == State.ScanFor770Start); + mMode = Mode.Initial1; + break; + case Mode.Initial1: + Debug.Assert(mState == State.ScanFor770Start); + mPositive = (sample >= mPrevSample); + mMode = Mode.InTransition; + // Set these up with something reasonable. + mLastPeakStartIndex = sampleIndex; + mLastPeakStartValue = sample; + break; + case Mode.InTransition: + // Stay in this state until two adjacent samples are very close in + // amplitude (or we change direction). We need to adjust our amplitude + // threshold based on sampling frequency, or at higher sample rates + // we're going to think everything is a transition. + // + // The approach here is overly simplistic, and is prone to failure + // when the sampling rate is high, especially with 8-bit samples + // or sounds cards that don't really have 16-bit resolution. The + // proper way to do this is to keep a short history, and evaluate + // the delta amplitude over longer periods. [At this point I'd + // rather just tell people to record at 22.05kHz.] + // + // Set the "hitPeak" flag and handle the consequence below. + if (mAlg == Algorithm.RoundPeak) { + transitionLimit = TRANS_MIN_DELTA * (mUsecPerSample / TRANS_DELTA_BASE_USEC); + } else { + transitionLimit = 0.0f; + } + + if (mPositive) { + if (sample < mPrevSample + transitionLimit) { + mMode = Mode.AtPeak; + hitPeak = true; + } + } else { + if (sample > mPrevSample - transitionLimit) { + mMode = Mode.AtPeak; + hitPeak = true; + } + } + break; + case Mode.AtPeak: + transitionLimit = PEAK_THRESHOLD; + if (mAlg == Algorithm.ShallowPeak) { + transitionLimit /= 4.0f; + } + + ampDelta = mLastPeakStartValue - sample; + if (ampDelta < 0) { + ampDelta = -ampDelta; + } + if (ampDelta > transitionLimit) { + if (sample >= mLastPeakStartValue) { + mPositive = true; // going up + } else { + mPositive = false; // going down + } + // Mark the end of the peak; could be same as start of peak. + mMode = Mode.InTransition; + } + break; + default: + throw new Exception("Bad mode " + mMode); + } + + // If we hit "peak" criteria, we regard the *previous* sample as the + // peak. This is very important for lower sampling rates (e.g. 8kHz). + if (hitPeak) { + // Delta time for peak-to-peak (half cycle). + int timeDelta = (sampleIndex - 1) - mLastPeakStartIndex; + // Amplitude peak-to-peak. + ampDelta = mLastPeakStartValue - mPrevSample; + if (ampDelta < 0) { + ampDelta = -ampDelta; + } + + float halfCycleUsec = timeDelta * mUsecPerSample; + + emitBit = UpdateState(sampleIndex - 1, halfCycleUsec, out bitVal); + + // Set the "peak start" values. + mLastPeakStartIndex = sampleIndex - 1; + mLastPeakStartValue = mPrevSample; + } + + // Remember this sample for the next go-round. + mPrevSample = sample; + + return emitBit; + } + + /// + /// Updates the state every half-cycle. + /// + /// Index of current sample. + /// Width, in usec, of current half-cycle. + /// Result: bit value we read (when returning true). + /// True if we want to output a bit. + private bool UpdateState(int sampleIndex, float halfCycleUsec, out int bitVal) { + bitVal = 0; + bool emitBit = false; + + float fullCycleUsec; + if (mHalfCycleWidthUsec != 0.0f) { + fullCycleUsec = halfCycleUsec + mHalfCycleWidthUsec; + } else { + fullCycleUsec = 0.0f; // only have first half + } + + switch (mState) { + case State.ScanFor770Start: + // Watch for a cycle of the appropriate length. + if (fullCycleUsec != 0.0f && + fullCycleUsec > LEAD_IN_HALF_WIDTH_USEC * 2.0f - LEAD_IN_MAX_ERROR_USEC * 2.0f && + fullCycleUsec < LEAD_IN_HALF_WIDTH_USEC * 2.0f + LEAD_IN_MAX_ERROR_USEC * 2.0f) { + // Now scanning 770Hz samples. + mState = State.Scanning770; + mNum770 = 1; + } + break; + case State.Scanning770: + // Count up the 770Hz cycles. + if (fullCycleUsec != 0.0f && + fullCycleUsec > LEAD_IN_HALF_WIDTH_USEC * 2.0f - LEAD_IN_MAX_ERROR_USEC * 2.0f && + fullCycleUsec < LEAD_IN_HALF_WIDTH_USEC * 2.0f + LEAD_IN_MAX_ERROR_USEC * 2.0f) { + mNum770++; + if (mNum770 > LEAD_IN_HALF_CYC_THRESHOLD / 2) { + // Looks like a solid tone, advance to next phase. + mState = State.ScanForShort0; + Debug.WriteLine("# looking for short 0"); + } + } else if (fullCycleUsec != 0.0f) { + // Pattern lost, reset. + if (mNum770 > 5) { + Debug.WriteLine("# lost 770 at " + sampleIndex + " width=" + fullCycleUsec + + " count=" + mNum770); + } + mState = State.ScanFor770Start; + } + break; + case State.ScanForShort0: + // Found what looks like a 770Hz field, find the short 0. + if (halfCycleUsec > SHORT_ZERO_HALF_WIDTH_USEC - SHORT_ZERO_MAX_ERROR_USEC && + halfCycleUsec < SHORT_ZERO_HALF_WIDTH_USEC + SHORT_ZERO_MAX_ERROR_USEC) { + Debug.WriteLine("# found short zero (half=" + halfCycleUsec + ") at " + + sampleIndex + " after " + mNum770 + " 770s"); + mState = State.Short0B; + // Make sure we treat current sample as first half. + mHalfCycleWidthUsec = 0.0f; + } else if (fullCycleUsec != 0.0f && + fullCycleUsec > LEAD_IN_HALF_WIDTH_USEC * 2.0f - LEAD_IN_MAX_ERROR_USEC * 2.0f && + fullCycleUsec < LEAD_IN_HALF_WIDTH_USEC * 2.0f + LEAD_IN_MAX_ERROR_USEC * 2.0f) { + // Still reading 770Hz cycles. + mNum770++; + } else if (fullCycleUsec != 0.0f) { + // Full cycle of the wrong size, we've lost it. + Debug.WriteLine("# lost 770 at " + sampleIndex + " width=" + fullCycleUsec + + " count=" + mNum770); + mState = State.ScanFor770Start; + } + break; + case State.Short0B: + // Pick up the second half of the start cycle. + Debug.Assert(fullCycleUsec != 0.0f); + if (fullCycleUsec > (SHORT_ZERO_HALF_WIDTH_USEC + ZERO_HALF_WIDTH_USEC) - ZERO_MAX_ERROR_USEC * 2.0f && + fullCycleUsec < (SHORT_ZERO_HALF_WIDTH_USEC + ZERO_HALF_WIDTH_USEC) + ZERO_MAX_ERROR_USEC * 2.0f) { + // As expected. + Debug.WriteLine("# found 0B " + halfCycleUsec + " (total " + fullCycleUsec + + "), advancing to read data state"); + mDataStart = sampleIndex; + mState = State.ReadData; + } else { + // Must be a false-positive at end of tone. + Debug.WriteLine("# didn't find post-short-0 value (half=" + + mHalfCycleWidthUsec + " + " + halfCycleUsec + ")"); + mState = State.ScanFor770Start; + } + break; + case State.ReadData: + // Check width of full cycle; don't double error allowance. + if (fullCycleUsec != 0.0f) { + if (fullCycleUsec > ZERO_HALF_WIDTH_USEC * 2 - ZERO_MAX_ERROR_USEC * 2 && + fullCycleUsec < ZERO_HALF_WIDTH_USEC * 2 + ZERO_MAX_ERROR_USEC * 2) { + bitVal = 0; + emitBit = true; + } else + if (fullCycleUsec > ONE_HALF_WIDTH_USEC * 2 - ONE_MAX_ERROR_USEC * 2 && + fullCycleUsec < ONE_HALF_WIDTH_USEC * 2 + ONE_MAX_ERROR_USEC * 2) { + bitVal = 1; + emitBit = true; + } else { + // Bad cycle, assume end reached. + Debug.WriteLine("# bad full cycle time " + fullCycleUsec + + " in data at " + sampleIndex + ", bailing"); + mDataEnd = sampleIndex; + mState = State.EndReached; + } + } + break; + default: + throw new Exception("bad state " + mState); + } + + // Save the half-cycle stats. + if (mHalfCycleWidthUsec == 0.0f) { + mHalfCycleWidthUsec = halfCycleUsec; + } else { + mHalfCycleWidthUsec = 0.0f; + } + return emitBit; + } + } +} diff --git a/CommonUtil/WAVFile.cs b/CommonUtil/WAVFile.cs index e5cd9d8..eb8b787 100644 --- a/CommonUtil/WAVFile.cs +++ b/CommonUtil/WAVFile.cs @@ -15,12 +15,15 @@ */ using System; using System.Diagnostics; +using System.Text; namespace CommonUtil { /// /// This processes a RIFF audio file (.wav). /// /// + /// The current implementation is for PCM WAVE data, which has a fixed size per sample, + /// but the API is intended to support compressed formats as well. /// Thanks: https://stackoverflow.com/q/8754111/294248 , /// http://soundfile.sapp.org/doc/WaveFormat/ , and /// https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/riffmci.pdf . @@ -38,7 +41,7 @@ public class WAVFile { public const int WAVE_FORMAT_ADPCM = 0x0002; public const int WAVE_FORMAT_IEEE_FLOAT = 0x0003; - private const int MIN_LEN = 44; + private const int MIN_LEN = 42; private const int RIFF_HEADER_LEN = 12; private const int CHUNK1_MIN_LEN = 16; private const int CHUNK1_MAX_LEN = 128; // arbitrary @@ -85,24 +88,31 @@ public class WAVFile { /// public int DataLength { get; private set; } - //public int BytesPerSample { - // get { return ((BitsPerSample + 7) / 8) * Channels; } - //} + /// + /// Data stream reference. + /// + private Stream mStream; /// /// Private constructor. /// - private WAVFile() { } + private WAVFile(Stream stream) { + mStream = stream; + } /// /// Parses the WAV file header. Does not process the audio samples. /// + /// + /// The WAVFile object holds a reference to the Stream, but does not take + /// ownership. + /// /// WAV file data stream, positioned at start. /// A new object with properties set, or null on failure. - public static WAVFile? ReadHeader(Stream stream) { - WAVFile wav = new WAVFile(); - if (!wav.ParseHeader(stream)) { + public static WAVFile? Prepare(Stream stream) { + WAVFile wav = new WAVFile(stream); + if (!wav.ParseHeader()) { return null; } if (wav.FormatTag != WAVE_FORMAT_PCM || @@ -118,29 +128,29 @@ private WAVFile() { } /// Parses the headers out of the RIFF file. On return, the stream will be positioned /// at the start of the audio sample data. /// - /// Data stream, positioned at start of RIFF data. + /// Data stream, positioned at start of RIFF data. /// True on success. - private bool ParseHeader(Stream stream) { - if (stream.Length - stream.Position < MIN_LEN) { + private bool ParseHeader() { + if (mStream.Length - mStream.Position < MIN_LEN) { return false; } // Read the RIFF header. byte[] riffHeader = new byte[RIFF_HEADER_LEN]; - stream.ReadExactly(riffHeader, 0, RIFF_HEADER_LEN); + mStream.ReadExactly(riffHeader, 0, RIFF_HEADER_LEN); uint chunkId = RawData.GetU32BE(riffHeader, 0); if (chunkId != SIG_RIFF) { - Debug.WriteLine("Not a RIFF (.wav) file, stream now at " + stream.Position); + Debug.WriteLine("Not a RIFF (.wav) file, stream now at " + mStream.Position); return false; } uint chunkSize = RawData.GetU32LE(riffHeader, 4); // size of everything that follows - if (stream.Length - stream.Position + 4 < chunkSize) { + if (mStream.Length - mStream.Position + 4 < chunkSize) { Debug.WriteLine("WAV file is too short"); return false; } uint chunkFormat = RawData.GetU32BE(riffHeader, 8); if (chunkId != SIG_RIFF || chunkFormat != SIG_WAVE) { - Debug.WriteLine("Incorrect WAVE file header, stream now at " + stream.Position); + Debug.WriteLine("Incorrect WAVE file header, stream now at " + mStream.Position); return false; } @@ -148,15 +158,19 @@ private bool ParseHeader(Stream stream) { // We don't know exactly how large it will be, but it's safe to assume that anything // we understand will have a reasonably-sized chunk here. bool ok; - uint subChunk1Id = RawData.ReadU32BE(stream, out ok); - uint subChunk1Size = RawData.ReadU32LE(stream, out ok); + uint subChunk1Id = RawData.ReadU32BE(mStream, out ok); + uint subChunk1Size = RawData.ReadU32LE(mStream, out ok); if (subChunk1Id != SIG_FMT || subChunk1Size < CHUNK1_MIN_LEN || subChunk1Size > CHUNK1_MAX_LEN) { Debug.WriteLine("Bad subchunk1 header"); return false; } + if (subChunk1Size > mStream.Length - mStream.Position) { + Debug.WriteLine("Subchunk1 exceeds file length"); + return false; + } byte[] subChunk1Header = new byte[subChunk1Size]; - stream.ReadExactly(subChunk1Header, 0, (int)subChunk1Size); + mStream.ReadExactly(subChunk1Header, 0, (int)subChunk1Size); // Process the common fields. FormatTag = RawData.GetU16LE(subChunk1Header, 0); @@ -189,6 +203,7 @@ private bool ParseHeader(Stream stream) { Debug.WriteLine("Warning: BlockAlign has unexpected value " + BlockAlign); } } else { + BitsPerSample = -1; Debug.WriteLine("Warning: audio format is not PCM: " + FormatTag); } @@ -196,8 +211,8 @@ private bool ParseHeader(Stream stream) { // scanning until we find it or run out of file. uint subChunk2Id, subChunk2Size; while (true) { - subChunk2Id = RawData.ReadU32BE(stream, out ok); - subChunk2Size = RawData.ReadU32LE(stream, out ok); + subChunk2Id = RawData.ReadU32BE(mStream, out ok); + subChunk2Size = RawData.ReadU32LE(mStream, out ok); if (!ok || subChunk2Size == 0) { Debug.WriteLine("Unable to find data chunk"); return false; @@ -207,24 +222,76 @@ private bool ParseHeader(Stream stream) { } Debug.WriteLine("Skipping chunk: '" + RawData.StringifyU32BE(subChunk2Id) + "'"); - stream.Seek(subChunk2Size, SeekOrigin.Current); + mStream.Seek(subChunk2Size, SeekOrigin.Current); } - if (stream.Length - stream.Position < subChunk2Size) { + if (mStream.Length - mStream.Position < subChunk2Size) { Debug.WriteLine("Bad subchunk2size " + subChunk2Size); return false; } + if (FormatTag == WAVE_FORMAT_PCM) { + int bytesPerSample = ((BitsPerSample + 7) / 8) * Channels; + if (subChunk2Size % bytesPerSample != 0) { + // Ignore partial sample data. + Debug.WriteLine("Warning: file ends with a partial sample; len=" + + subChunk2Size); + subChunk2Size -= (uint)(subChunk2Size % bytesPerSample); + } + } + // All done. Stream is positioned at the start of the data. - DataOffset = stream.Position; + DataOffset = mStream.Position; DataLength = (int)subChunk2Size; return true; } + /// + /// Seeks the file stream to the start of the sample area. + /// + public void SeekToStart() { + mStream.Position = DataOffset; + } + + /// + /// Returns a string with a human-readable summary of the file format. + /// + public string GetInfoString() { + StringBuilder sb = new StringBuilder(); + sb.Append("RIFF WAVE, format="); + sb.Append(FormatTag); + if (FormatTag == WAVE_FORMAT_PCM) { + sb.Append(" (PCM)"); + } + switch (Channels) { + case 1: + sb.Append(" mono"); + break; + case 2: + sb.Append(" stereo"); + break; + default: + sb.Append(' '); + sb.Append(Channels); + sb.Append("-channel"); + break; + } + sb.Append(' '); + sb.Append(SamplesPerSec); + sb.Append("Hz"); + if (FormatTag == WAVE_FORMAT_PCM) { + sb.Append(' '); + sb.Append(BitsPerSample); + sb.Append("-bit"); + } + return sb.ToString(); + } + // // WAVE PCM encoding, briefly: // // Samples are stored sequentially, in whole bytes. For sample sizes that aren't a - // multiple of 8, data is stored the most-significant bits. The low bits are set to zero. + // multiple of 8, data is stored in the most-significant bits. The low bits are set + // to zero. // // For bits per sample <= 8, values are stored as unsigned, e.g. 8 bits = [0,255]. // For bits per sample > 8, values are stored as signed, e.g. 16 bits = [-32768,32767]. @@ -236,12 +303,58 @@ private bool ParseHeader(Stream stream) { // 16-bit stereo: (ch0l ch0h ch1l ch1h) ... // - public int ConvertSamplesToReal(Stream stream, int count, float[] outBuf) { - // TODO - // For stereo recordings, just use the left channel, which comes first in - // each sample. - // https://github.com/fadden/ciderpress/blob/fc2fc1429df0a099692d9393d214bd6010062b1a/app/CassetteDialog.cpp#L715 - throw new NotImplementedException(); + private const int READ_BUF_LEN = 65536; // must be a multiple of 4 + private byte[]? mReadBuf = null; + + /// + /// Reads samples from the current stream position, and converts them to floating point + /// values, in the range [-1,1). The method will attempt to fill the entire buffer, + /// but may not be able to do so if the end of the file is reached or the internal + /// read buffer is smaller than the request. + /// + /// + /// This always uses the samples from channel 0, which is the left channel in a stereo + /// recording. + /// + /// Buffer that receives output. + /// Offset to first location in output buffer that will + /// receive data. + /// Number of values stored in the output buffer, or 0 if EOF has been reached. + /// Returns -1 if we can't interpret this WAVE data. + public int GetSamples(float[] outBuf, int outOffset) { + if (FormatTag != WAVE_FORMAT_PCM || BitsPerSample > 16) { + return -1; + } + int bytesPerSample = ((BitsPerSample + 7) / 8) * Channels; + + int desiredNumSamples = outBuf.Length - outOffset; + int byteCount = desiredNumSamples * bytesPerSample; + int bytesRemaining = DataLength - (int)(mStream.Position - DataOffset); + if (byteCount > bytesRemaining) { + byteCount = bytesRemaining; + } + + if (mReadBuf == null) { + mReadBuf = new byte[READ_BUF_LEN]; + } + mStream.ReadExactly(mReadBuf, 0, byteCount); + int offset = 0; + + if (BitsPerSample <= 8) { + while (byteCount != 0) { + outBuf[outOffset++] = (mReadBuf[offset] - 128) / 128.0f; + offset += bytesPerSample; + byteCount -= bytesPerSample; + } + } else { + while (byteCount != 0) { + int sample = mReadBuf[offset] | (mReadBuf[offset + 1] << 8); + outBuf[outOffset++] = sample / 32768.0f; + offset += bytesPerSample; + byteCount -= bytesPerSample; + } + } + return outOffset; } } } diff --git a/DiskArc/Arc/AudioRecording-notes.md b/DiskArc/Arc/AudioRecording-notes.md index 71f9532..b03214d 100644 --- a/DiskArc/Arc/AudioRecording-notes.md +++ b/DiskArc/Arc/AudioRecording-notes.md @@ -2,8 +2,12 @@ ## Primary References ## - - General info: https://retrocomputing.stackexchange.com/a/144/56 - Apple II ROM tape routines (READ, WRITE) + - CiderPress implementation + (https://github.com/fadden/ciderpress/blob/master/app/CassetteDialog.cpp) + +There is a substantial library of Apple II cassette tapes at +https://brutaldeluxe.fr/projects/cassettes/. ## Background ## @@ -21,8 +25,8 @@ structure: - Tape-in edge: 1/2 cycle at 400 usec/cycle, followed by 1/2 cycle at 500 usec/cycle. This "short zero" indicates the transition between header and data. - - Data: one cycle per bit, using 500 usec/cycle for 0 and - 1000 usec/cycle for 1. + - Data: one cycle per bit, using 500 usec/cycle (2kHz) for 0 and + 1000 usec/cycle (1kHz) for 1. There is no "end of data" indication, so it's up to the user to specify the length of data. The last byte of data is followed by an XOR checksum, @@ -122,8 +126,91 @@ a full cycle before making an evaluation, though we still need to examine the half-cycle timing during the lead-in to catch the "short 0". Because of these distortions, 8-bit 8KHz audio is probably not a good -idea. 16-bit 22.05KHz sampling is a better choice for tapes that have -been sitting around for 25-30 years. +idea. 22.05KHz sampling is a better choice for tapes that have been sitting +around for 25-30 years. + +## Sound Capture ## + +_Here are some tips on capturing audio from an Apple II data recording. This +comes from the manual for the original CiderPress application._ + +It isn't necessary to record each section of data from the cassette into its +own WAV file. CiderPress will try to find every chunk of data in a WAV file. + +If you have a "line out" on your tape player and a "line in" on your PC sound +card, use those. If not, you can use the "microphone" input and the +"headphone" out, though you will have to set the volume levels correctly. In +the "speaker" or "multimedia" control panel, set the microphone input gain to +50%. Start up your sound editor. If you have a way to see the input level +on the microphone, turn that on. Play the cassette tape out loud until you +hear a tone, then plug it into the computer and watch the input level. You +want to set the volume so that the input is as high as you can get it without +exceeding the limit (this causes "clipping", which is a lot like a square +wave but probably isn't going to help us here). + +Once you have the volume level figured out, back the tape up to the start of +the tone. Hit "record" in your software and "play" on your tape player. +Record at 22.05KHz with 8-bit monaural samples. (Recording at CD quality, +44.1KHz with 16-bit stereo samples, doesn't help and requires 8x the space.) +If your software shows an input meter while recording, continue to record +until the volume level drops and stays low for at least 10 seconds. If you +can't monitor the input, you will either need to time the cassette, or just +record for a long time and perhaps trim the excess off in the sound editor. +Make sure you get all of the data from the tape. When you think you're done, +pull the audio plug out of the tape player and keep listening for a +little bit. + +Tip: CiderPress only needs to see about a second of the lead-in, so it's okay +to fiddle with the volume while the initial tone is playing. + +Tip: in some cases, setting the volume a little too high can be beneficial. +It's better to clip some samples than have too little signal. If at first +you don't succeed, crank up the volume a notch and try again. + + +Most cassettes include more than one copy of a program. In some cases (such +as Adventure International's "Asteroid") they are slightly different +implementations, while in others it's the same program repeated. Sometimes +the program is repeated on the back side of the tape. Magnetic tapes wear out +if you play them too much, so redundancy was common. + +The output of the Apple II is a blocky "square wave" rather than a smooth +"sine wave". Because of limitations in how quickly voltage levels can change, +the output isn't perfectly square. Because of the physical properties of and +variations in magnetic media, the not-quite-square wave is rather rounded and +wiggly. After being stored in less-than-perfect conditions for 25-30 years, +what you read back from an Apple II tape can look pretty crazy. + +Most cassettes can be recovered, even those that will no longer play on an +Apple II. If you find one that can't, you may want to keep the WAV recording +anyway, on the off chance that in the future an improved algorithm can be +developed that will decode it. + +When CiderPress encounters data that it can't interpret, it stops trying to +read from that section of the WAV file. For this reason, damaged entries will +usually be shorter than undamaged ones. If a file appears to have the correct +length but the checksum still doesn't match, it means the signal was +sufficiently distorted to make a '0' bit look like a '1' bit, which is actually +pretty hard to do. In most cases the decoder will either make an accurate +determination or will conclude that the signal is too distorted to process. +So far only one case has been found where the checksum was deliberately +altered, as part of a copy protection scheme (Sargon II). + +If the tape has more than one program on it, you can usually tell if it's +multiple copies of the same thing by comparing lengths and checksums. If the +checksums say "good" but have different values, you probably have two +different programs, or two slightly different versions of the same program. + + +You may be tempted to store copies of the WAV file in MP3 format. This is not +recommended. CiderPress cannot decode MP3s, and the decoded MP3 file is less +likely to work than the original. However, experiments with converting the +sound files in and out of MP3 format suggest that "healthy" files are unharmed +at reasonable compression ratios. + +You don't need fancy equipment. Connecting the headphone jack of a 15-year-old +"boom box" to the microphone jack of a low-cost PC with on-motherboard audio +works just fine. ## ROM Implementation ## diff --git a/DiskArc/Arc/AudioRecording.cs b/DiskArc/Arc/AudioRecording.cs index 993ba2f..2b2a997 100644 --- a/DiskArc/Arc/AudioRecording.cs +++ b/DiskArc/Arc/AudioRecording.cs @@ -16,7 +16,6 @@ using System; using System.Collections; using System.Diagnostics; -using System.Text; using CommonUtil; using static DiskArc.Defs; @@ -98,7 +97,7 @@ IEnumerator IEnumerable.GetEnumerator() { /// True if this looks like a match. public static bool TestKind(Stream stream, AppHook appHook) { stream.Position = 0; - WAVFile? wav = WAVFile.ReadHeader(stream); + WAVFile? wav = WAVFile.Prepare(stream); return (wav != null); } @@ -122,12 +121,14 @@ private AudioRecording(Stream? stream, WAVFile wavFile, AppHook appHook) { /// Data stream is not compatible. public static AudioRecording OpenArchive(Stream stream, AppHook appHook) { stream.Position = 0; - WAVFile? wav = WAVFile.ReadHeader(stream); + WAVFile? wav = WAVFile.Prepare(stream); if (wav == null) { - throw new NotSupportedException("Not a recognized WAV file format"); + throw new NotSupportedException("Not a supported WAV file format"); } AudioRecording archive = new AudioRecording(stream, wav, appHook); - // TODO - scan contents + List chunks = CassetteDecoder.DecodeFile(wav, + CassetteDecoder.Algorithm.Zero); + // TODO - analyze contents return archive; } diff --git a/DiskArc/Arc/AudioRecording_FileEntry.cs b/DiskArc/Arc/AudioRecording_FileEntry.cs index d65e195..3ac7f9a 100644 --- a/DiskArc/Arc/AudioRecording_FileEntry.cs +++ b/DiskArc/Arc/AudioRecording_FileEntry.cs @@ -82,7 +82,7 @@ public byte Access { public DateTime CreateWhen { get => TimeStamp.NO_DATE; set { } } public DateTime ModWhen { get => TimeStamp.NO_DATE; set { } } - public long StorageSize => 1234; // TODO, report WAV size? + public long StorageSize => 1234; // TODO public long DataLength => 1234; // TODO