|
From: <mor...@us...> - 2008-10-15 02:05:31
|
Revision: 5379
http://personalrobots.svn.sourceforge.net/personalrobots/?rev=5379&view=rev
Author: morgan_quigley
Date: 2008-10-15 02:05:21 +0000 (Wed, 15 Oct 2008)
Log Message:
-----------
some starter audio utilities to grab an audio stream and chop it up into segments separated by silence
Added Paths:
-----------
pkg/trunk/audio/
pkg/trunk/audio/audio_capture/
pkg/trunk/audio/audio_capture/CMakeLists.txt
pkg/trunk/audio/audio_capture/Makefile
pkg/trunk/audio/audio_capture/audio_capture.cpp
pkg/trunk/audio/audio_capture/audio_clip_writer.cpp
pkg/trunk/audio/audio_capture/manifest.xml
Added: pkg/trunk/audio/audio_capture/CMakeLists.txt
===================================================================
--- pkg/trunk/audio/audio_capture/CMakeLists.txt (rev 0)
+++ pkg/trunk/audio/audio_capture/CMakeLists.txt 2008-10-15 02:05:21 UTC (rev 5379)
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 2.6)
+include(rosbuild)
+rospack(audio_capture)
+
+rospack_add_executable(audio_capture audio_capture.cpp)
+target_link_libraries(audio_capture portaudio)
+
+rospack_add_executable(audio_clip_writer audio_clip_writer.cpp)
+target_link_libraries(audio_clip_writer sndfile)
+
Added: pkg/trunk/audio/audio_capture/Makefile
===================================================================
--- pkg/trunk/audio/audio_capture/Makefile (rev 0)
+++ pkg/trunk/audio/audio_capture/Makefile 2008-10-15 02:05:21 UTC (rev 5379)
@@ -0,0 +1 @@
+include $(shell rospack find mk)/cmake.mk
Added: pkg/trunk/audio/audio_capture/audio_capture.cpp
===================================================================
--- pkg/trunk/audio/audio_capture/audio_capture.cpp (rev 0)
+++ pkg/trunk/audio/audio_capture/audio_capture.cpp 2008-10-15 02:05:21 UTC (rev 5379)
@@ -0,0 +1,117 @@
+///////////////////////////////////////////////////////////////////////////////
+// A simple audio grabber which uses PortAudio to put an audio stream into ROS
+//
+// Copyright (C) 2008, Morgan Quigley
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of Stanford University nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//////////////////////////////////////////////////////////////////////////////
+
+#include <cstdio>
+#include <portaudio.h>
+#include "ros/node.h"
+#include "robot_msgs/AudioRawStream.h"
+
+const static int SAMPLE_RATE = 44100; // todo: make this a parameter.
+ros::node *g_audio_node = NULL;
+bool g_caught_sigint = false;
+
+#define SHOW_MAX_SAMPLE
+//#define GRATUITOUS_DEBUGING
+
+static int audio_cb(const void *input, void *output,
+ unsigned long frame_count,
+ const PaStreamCallbackTimeInfo *time_info,
+ PaStreamCallbackFlags status, void *user_data)
+{
+ static robot_msgs::AudioRawStream audio_msg;
+ audio_msg.num_channels = 1;
+ audio_msg.sample_rate = SAMPLE_RATE;
+#ifdef GRATUITOUS_DEBUGGING
+ static ros::Time prev_t;
+ ros::Time t(ros::Time::now());
+ ROS_DEBUG("got %lu samples, dt = %.3f\n", frame_count, (t - prev_t).to_double());
+ prev_t = t;
+#endif
+#ifdef SHOW_MAX_SAMPLE
+ float max = 0;
+#endif
+ audio_msg.samples.resize(frame_count);
+ for (uint32_t i = 0; i < frame_count; i++)
+ {
+ float val = ((float *)input)[i];
+ audio_msg.samples[i] = val;
+#ifdef SHOW_MAX_SAMPLE
+ if (val > max)
+ max = val;
+#endif
+ }
+#ifdef SHOW_MAX_SAMPLE
+ printf("%.3f\r", max);
+ fflush(stdout);
+#endif
+ g_audio_node->publish("audio", audio_msg);
+ return paContinue;
+}
+
+void sig_handler(int sig)
+{
+ g_caught_sigint = true;
+ ROS_DEBUG("caught sigint, init shutdown sequence...");
+}
+
+int main(int argc, char **argv)
+{
+ PaStream *stream;
+ PaError err;
+ ROS_DEBUG("initializing portaudio");
+ if (Pa_Initialize() != paNoError)
+ ROS_FATAL("unable to initialize portaudio");
+ ROS_DEBUG("opening default audio stream");
+ ros::init(argc, argv);
+ ros::node n("audio_capture", ros::node::DONT_HANDLE_SIGINT);
+ g_audio_node = &n;
+ n.advertise<robot_msgs::AudioRawStream>("audio", 5);
+ err = Pa_OpenDefaultStream(&stream, 1, 0, paFloat32, SAMPLE_RATE, 4096,
+ audio_cb, NULL);
+ if (err != paNoError)
+ ROS_FATAL("unable to open audio stream\n");
+ ROS_INFO("starting audio stream, ctrl-c to stop.");
+#ifdef SHOW_MAX_SAMPLE
+ ROS_INFO("the printout stream shows the maximum audio sample for each portaudio snippet; use this to sanity-check your audio configuration and to set the gains inside alsamixer and on your audio hardware. You want the peaks to be near 1.0 but not quite 1.0 -- digital audio doesn't saturate very nicely.");
+#endif
+ if (Pa_StartStream(stream) != paNoError)
+ ROS_FATAL("unable to start audio stream");
+ signal(SIGINT, sig_handler);
+ while (!g_caught_sigint)
+ usleep(500*1000);
+ ROS_DEBUG("stopping audio stream");
+ if (Pa_StopStream(stream) != paNoError)
+ ROS_FATAL("unable to stop audio stream");
+ ROS_DEBUG("shutting down portaudio");
+ if (Pa_Terminate() != paNoError)
+ ROS_FATAL("unable to close portaudio");
+ ros::fini();
+ return 0;
+}
+
Added: pkg/trunk/audio/audio_capture/audio_clip_writer.cpp
===================================================================
--- pkg/trunk/audio/audio_capture/audio_clip_writer.cpp (rev 0)
+++ pkg/trunk/audio/audio_capture/audio_clip_writer.cpp 2008-10-15 02:05:21 UTC (rev 5379)
@@ -0,0 +1,172 @@
+///////////////////////////////////////////////////////////////////////////////
+// This program watches a ROS audio stream and writes its clips into files
+//
+// Copyright (C) 2008, Morgan Quigley
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of Stanford University nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//////////////////////////////////////////////////////////////////////////////
+
+#include <cstdio>
+#include <deque>
+#include <vector>
+#include "ros/node.h"
+#include "robot_msgs/AudioRawStream.h"
+#include <sndfile.h>
+
+using std::deque;
+using std::vector;
+
+static const float CLIP_START_POWER = 0.05f;
+static const float CLIP_MAINTAIN_POWER = 0.03f;
+static const float WINDOW_LENGTH = 1.0; // seconds
+static const float MIN_CLIP_LENGTH = 2.0; // seconds
+
+class AudioWriter
+{
+public:
+ AudioWriter(ros::node *n)
+ : clip_num(0), clip_start(0), clip_end(0), audio_clock(0),
+ clip_state(IDLE), window_power(0)
+ {
+ n->subscribe("audio", audio_msg, &AudioWriter::audio_cb, this, 5);
+ }
+ void audio_cb()
+ {
+ if (audio_msg.num_channels != 1)
+ ROS_FATAL("audio_clip_writer can only handle single-channel audio.");
+ for (size_t i = 0; i < audio_msg.samples.size(); i++)
+ {
+ audio_clock++;
+ // maintain a window of 0.5 seconds
+ if (window.size() >= WINDOW_LENGTH * audio_msg.sample_rate)
+ window.pop_front();
+ window.push_back(audio_msg.samples[i]);
+ if (audio_clock % 100 == 0)
+ {
+ // compute RMS power
+ float sum_squares = 0;
+ int win_idx = 0;
+ for (deque<float>::iterator j = window.begin(); j != window.end();
+ ++j, win_idx++)
+ sum_squares += (*j) * (*j);// * ((float)win_idx / window.size());
+ //window_power = sqrt(2 * sum_squares / window.size());
+ window_power = sqrt(sum_squares / window.size());
+ //printf("%f\n", window_power);
+ }
+ switch (clip_state)
+ {
+ case IDLE:
+ if (fabs(audio_msg.samples[i]) > CLIP_START_POWER)
+ {
+ printf("clip start\n");
+ clip_start = audio_clock;
+ clip_state = CLIP_START;
+ clip.clear();
+ clip.reserve(window.size());
+ for (deque<float>::iterator j = window.begin();
+ j != window.end(); ++j)
+ clip.push_back(*j);
+ }
+ break;
+ case CLIP_START:
+ clip.push_back(audio_msg.samples[i]);
+ if (audio_clock - clip_start < audio_msg.sample_rate * WINDOW_LENGTH)
+ {
+ if (window_power > CLIP_MAINTAIN_POWER)
+ clip_state = IN_CLIP;
+ }
+ else
+ {
+ clip_state = IDLE;
+ printf("audio power never ramped up; it's not a real clip.\n");
+ }
+ break;
+ case IN_CLIP:
+ clip.push_back(audio_msg.samples[i]);
+ if (window_power < CLIP_MAINTAIN_POWER)
+ {
+ clip_state = IDLE;
+ const float clip_len = (float)clip.size() / audio_msg.sample_rate;
+ if (clip_len >= MIN_CLIP_LENGTH)
+ {
+ printf("good clip, length = %.2f seconds\n", clip_len);
+ // normalize it
+ float max_amp = 0, mean = 0;
+ for (vector<float>::iterator j = clip.begin();
+ j != clip.end(); ++j)
+ mean += *j;
+ mean /= clip.size();
+ for (vector<float>::iterator j = clip.begin();
+ j != clip.end(); ++j)
+ {
+ *j -= mean;
+ if (fabs(*j) > max_amp)
+ max_amp = fabs(*j);
+ }
+ for (vector<float>::iterator j = clip.begin();
+ j != clip.end(); ++j)
+ *j /= max_amp * 1.05;
+ SF_INFO sf_info;
+ sf_info.samplerate = audio_msg.sample_rate;
+ sf_info.channels = 1;
+ sf_info.format = SF_FORMAT_AU | SF_FORMAT_FLOAT;
+ char fnamebuf[100];
+ snprintf(fnamebuf, sizeof(fnamebuf), "clip%06d.au", clip_num++);
+ SNDFILE *sf = sf_open(fnamebuf, SFM_WRITE, &sf_info);
+ ROS_ASSERT(sf);
+ sf_write_float(sf, &clip[0], clip.size());
+ sf_close(sf);
+ //char cmd[500];
+ //system(cmd);
+ printf("done\n");
+ }
+ else
+ printf("too short. length = %.2f seconds\n", clip_len);
+ }
+ break;
+ }
+ }
+ }
+private:
+ int32_t clip_num, clip_start, clip_end;
+ uint64_t audio_clock;
+ enum { IDLE, CLIP_START, IN_CLIP } clip_state;
+ float window_power;
+ robot_msgs::AudioRawStream audio_msg;
+ deque<float> window;
+ vector<float> clip;
+};
+
+int main(int argc, char **argv)
+{
+ ros::init(argc, argv);
+ ros::node n("audio_clip_writer");
+ AudioWriter *aw = new AudioWriter(&n);
+ while (n.ok())
+ ros::Duration(0, 500000000).sleep();
+ ros::fini();
+ delete aw;
+ return 0;
+}
+
Added: pkg/trunk/audio/audio_capture/manifest.xml
===================================================================
--- pkg/trunk/audio/audio_capture/manifest.xml (rev 0)
+++ pkg/trunk/audio/audio_capture/manifest.xml 2008-10-15 02:05:21 UTC (rev 5379)
@@ -0,0 +1,9 @@
+<package>
+ <description>This package grabs a live audio stream from PortAudio and ships it across the ROS botnet. The program grabs the "default" audio stream. I can't speak for other distros, but on Ubuntu the default stream is settable via the GNOME "System->Preferences->Sound" box. I set the "Sound Capture" field to "ALSA" and use "alsamixer" on the command line to poke around until I get the microphone jack working. Each motherboard seems to be a little different in how the various jacks come up in ALSA.</description>
+ <author>Morgan Quigley</author>
+ <license>BSD</license>
+ <depend package="roscpp"/>
+ <depend package="robot_msgs"/>
+ <sysdepend os="ubuntu" version="8.04-hardy" package="portaudio19-dev"/>
+ <sysdepend os="ubuntu" version="8.04-hardy" package="libsndfile-dev"/>
+</package>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|