-
-
Save avimar/d2e9d05e082ce273962d742eb9acac16 to your computer and use it in GitHub Desktop.
#!/bin/bash | |
# | |
# pcap2wav | |
# Original Author: Michael Collins <[email protected]> | |
#Standard disclaimer: batteries not included, your mileage may vary... | |
# Updated by Avi Marcus <[email protected]> | |
# | |
# Accepts arg of pcap file w/only 2 RTP streams | |
# Creates a .<codec> file and a .wav file | |
# For codecs other than PCMA and PCMU the script calls fs_cli and does a little recording to create the wav file(s) | |
# Current codec support: g711a/u, GSM, G722, G729 | |
# check for -h -help or --help | |
if [[ $1 == "-h" || $1 == "-help" || $1 == "--help" || $1 == "" ]] | |
then | |
cat <<EOF | |
pcap2wav is a simple utility to make it easier to extract the audio from a pcap | |
Dependencies: | |
apt-get install -y tshark sox | |
yum install wireshark sox | |
Usage: | |
pcap2wav [opts] filename.pcap [target filename] | |
Script attempts to create a few files: a .<codec> file and a .wav file for each RTP stream | |
It requires Tshark to be installed on the system. If a codec other than PCMA or PCMU | |
is used then the script will attempt to use fs_cli to decode and create a wav. | |
Supported codecs: | |
PCMU (G711 ulaw) | |
PCMA (G711 Alaw) | |
GSM | |
G722 (requires fs_encode) | |
G729 (requres fs_encode with mod_com_g729) | |
Supported options: | |
-z Perform "clean and zip" - After converting to wav files the program will "clean up" | |
by putting the wav files into a .tgz file and then removing | |
the .wav and .<codec> files from the disk. | |
EOF | |
exit | |
fi | |
if [[ $1 == "-z" ]] | |
then | |
CLEAN=true | |
CAPFILE=$2 | |
TARGETFILE=$3 | |
else | |
CLEAN=false | |
CAPFILE=$1 | |
TARGETFILE=$2 | |
fi | |
LOGDIR=/var/log | |
TSHARK=`which tshark` | |
SOX=`which sox` | |
FSENCODE=`which fs_encode` | |
if [[ $TSHARK == "" ]] | |
then | |
echo "Tshark not found. Please install Tshark and then re-run this script" | |
exit | |
fi | |
if [[ $SOX == "" ]] | |
then | |
echo "Sox not found. Please install Sox and then re-run this script" | |
exit | |
fi | |
# Make sure pcap exists | |
if [ -f $CAPFILE ] | |
then | |
echo "Found $CAPFILE, working..." | |
else | |
echo "$CAPFILE not found, exiting." | |
exit | |
fi | |
# Set target file names; default is "pcap2wav.<codec>" and "pcap2wav.wav" | |
if [[ $TARGETFILE == "" ]] | |
then | |
TARGETFILE="/tmp/pcap2wav" | |
else | |
echo "Using $TARGETFILE" | |
fi | |
echo "Checking $CAPFILE for RTP streams..." | |
# Locate RTP streams, put into temp file | |
tshark -n -r $CAPFILE -Y rtp -T fields -e rtp.ssrc -e udp.dstport -Eseparator=, | sort -u > /tmp/pcap2wav.tmp | |
# Count the RTP streams | |
num_streams=`grep -c "" /tmp/pcap2wav.tmp` | |
streams=( $(cat /tmp/pcap2wav.tmp) ) | |
#Extract stream ssrc and port | |
for item in `seq 1 $num_streams`; do | |
index=$((item-1)) | |
ssrc[$item]=`echo ${streams[$index]} | cut -d, -f1` | |
port[$item]=`echo ${streams[$index]} | cut -d, -f2` | |
done | |
payload_type=`tshark -n -r $CAPFILE -T fields -e rtp.p_type | grep -P '\d+' | head -n 1` | |
case $payload_type in | |
0) codec='PCMU' | |
for item in `seq 1 $num_streams`; do | |
convert[$item]="$SOX -t ul -r 8000 -c 1 ${TARGETFILE}_$item.$codec ${TARGETFILE}_$item.wav" | |
done | |
;; | |
3) codec='GSM' | |
for item in `seq 1 $num_streams`; do | |
convert[$item]="$SOX -t gsm -r 8000 -c 1 ${TARGETFILE}_$item.$codec ${TARGETFILE}_$item.wav" | |
done | |
;; | |
8) codec='PCMA' | |
for item in `seq 1 $num_streams`; do | |
convert[$item]="$SOX -t al -r 8000 -c 1 ${TARGETFILE}_$item.$codec ${TARGETFILE}_$item.wav" | |
done | |
;; | |
9) codec='G722' | |
for item in `seq 1 $num_streams`; do | |
convert[$item]="$FSENCODE ${TARGETFILE}_$item.$codec ${TARGETFILE}_$item.wav" | |
done | |
;; | |
18) codec='G729' | |
for item in `seq 1 $num_streams`; do | |
convert[$item]="$FSENCODE -l mod_com_g729 ${TARGETFILE}_$item.$codec ${TARGETFILE}_$item.wav" | |
done | |
;; | |
esac | |
if [ -z "$codec" ]; then | |
echo "Unable to determine codec from payload type: $payload_type" | |
exit | |
fi | |
echo "Target files to create:" | |
for item in `seq 1 $num_streams`; do | |
echo "${TARGETFILE}_$item.$codec and ${TARGETFILE}_$item.wav" | |
done | |
echo | |
for item in `seq 1 $num_streams`; do | |
echo "Stream $item ssrc / port: ${ssrc[$item]} / ${port[$item]}" | |
done | |
echo | |
for item in `seq 1 $num_streams`; do | |
echo "Extracting payloads $item from ${ssrc[$item]}..." | |
tshark -n -r $CAPFILE -Y "rtp.ssrc == ${ssrc[$item]}" -T fields -e rtp.payload > /tmp/pcap2wav.payloads${item} 2> /dev/null | |
for payload in `cat /tmp/pcap2wav.payloads${item}`;do IFS=:;for byte in $payload; do printf "\\x$byte" >> ${TARGETFILE}_$item.$codec; done; done | |
unset IFS | |
command="${convert[$item]}" | |
$command | |
done | |
# If two streams then assume they're a pair and combine them nicely | |
if [[ $num_streams == "2" ]] | |
then | |
echo "Combining 2 streams into a single wav file for convenience" | |
# Find shorter recording, calc time diff in samples | |
samples1=`soxi -s ${TARGETFILE}_1.wav` | |
samples2=`soxi -s ${TARGETFILE}_2.wav` | |
if [[ $samples1 -gt $samples2 ]] | |
then | |
longer="${TARGETFILE}_1.wav" | |
shorter="${TARGETFILE}_2.wav" | |
delay=`expr $samples1 - $samples2` | |
else | |
longer="${TARGETFILE}_2.wav" | |
shorter="${TARGETFILE}_1.wav" | |
delay=`expr $samples2 - $samples1` | |
fi | |
pad="${delay}s" | |
command="$SOX $shorter ${TARGETFILE}_tmp.wav pad $pad 0s" | |
$command | |
# Create "combined" file, padding beginning with silence | |
command="$SOX -m ${TARGETFILE}_tmp.wav $longer ${TARGETFILE}_mixed.wav" | |
$command | |
rm -fr ${TARGETFILE}_tmp.wav | |
fi | |
if [[ $CLEAN == "true" ]] | |
then | |
echo "Clean option" | |
ZIPFILE=${TARGETFILE}.tgz | |
rm -fr $ZIPFILE | |
/bin/tar czf $ZIPFILE ${TARGETFILE}*wav > /dev/null 2>& 1 | |
for item in `seq 1 $num_streams`; do | |
rm -fr ${TARGETFILE}_$item.* | |
done | |
rm -fr $TARGETFILE.tmp | |
else | |
echo "No clean option specified - leaving .<codec> and .wav files on system." | |
fi | |
echo | |
echo "Operation complete" | |
echo |
Can you explain this line?
payload_type=tshark -n -r $CAPFILE -T fields -e rtp.p_type | grep -P '\d+' | head -n 1
my versions of grep don't seem to have a -P flag.
Can you explain this line?
payload_type=
tshark -n -r $CAPFILE -T fields -e rtp.p_type | grep -P '\d+' | head -n 1
my versions of grep don't seem to have a -P flag.
I didn't write this line, not sure what it does. Some googling showed this:
-P, --perl-regexp
Interpret PATTERN as a Perl regular expression. This is highly experimental and grep -P may warn of unimplemented features.
Hi avimar,
I just used your script and convert a G711u file but unsuccessfully.
#tctoa@plats1:~$ ./pcap2wav -z test.pcap test1.wav
Found test.pcap, working...
Using test1.wav
Checking test.pcap for RTP streams...
Running as user "tctoa" and group "cvas".
Running as user "tctoa" and group "cvas".
I am not sure why, can you help me?
Hi Avimar
I've used the script and found it very helpful.
Any chance you could assist me in associating the incoming caller & receiving caller to the file names?
I know all the data is typically inside the PCAP, but I'm struggling to connect the data between SIP, SDP, RTP.
Thanks in advance!
@ImThatOneDolphin https://gist.github.com/avimar/d2e9d05e082ce273962d742eb9acac16#gistcomment-3501811
Hi Avimar
I've used the script and found it very helpful.
Any chance you could assist me in associating the incoming caller & receiving caller to the file names?
I know all the data is typically inside the PCAP, but I'm struggling to connect the data between SIP, SDP, RTP.
Thanks in advance!
I've never tried parsing the PCAP for that information. When using pcapsipdump I've had it encode something into the file name, e.g. the sip call id. I do matching based on that. That's probably simpler than parsing the file.
some time ago I wrote a similar script which does look for associated RTP streams (forward and reversed direction) and mixes them into one single file
sp4rkie/pcap2audio: CLI tool to extract conversation audio from pcap files
maybe that could help you
is there any way to export wav file starting after "200 connect"?
Can you explain this line?
payload_type=
tshark -n -r $CAPFILE -T fields -e rtp.p_type | grep -P '\d+' | head -n 1
my versions of grep don't seem to have a -P flag.
@craigbruenderman try replacing -P
with -E
. That worked for me on macOS. You basically are telling grep
to use extended regular expressions to search for a pattern. In this case the pattern being one or more digits.
Is this script massively broken for anyone else? Ubuntu has no package for fs_encode, but the script's error handling happily plows forward regardless, leading to the output files being painful, high-pitched ringing noises at full volume.
Is this script massively broken for anyone else? Ubuntu has no package for fs_encode, but the script's error handling happily plows forward regardless, leading to the output files being painful, high-pitched ringing noises at full volume.
Interesting, there's no check here that fs_encode
exists.
It's only for G722 and G729 (requires licenses for g729).
It's installed via the freeswitch
package, see the latest install instructions for debian 10: https://freeswitch.org/confluence/display/FREESWITCH/Debian+10+Buster
Hi avimar,
i tried your script and i got an error here -
/usr/bin/sox FAIL formats: can't open input file `outputofpcap_3.PCMA': No such file or directory
$ ./pcap2wav -z test.pcap outputofpcap.wav
Found test.pcap, working...
Using outputofpcap.wav
Checking test.pcap for RTP streams...
Target files to create:
outputofpcap.wav_1.PCMA and outputofpcap.wav_1.wav
outputofpcap.wav_2.PCMA and outputofpcap.wav_2.wav
outputofpcap.wav_3.PCMA and outputofpcap.wav_3.wav
Stream 1 ssrc / port: 0x37968baa / 40392
Stream 2 ssrc / port: 0x3796cb71 / 40392
Stream 3 ssrc / port: 0xaaaaaaaa / 40392
Extracting payloads 1 from 0x37968baa...
Extracting payloads 2 from 0x3796cb71...
Extracting payloads 3 from 0xaaaaaaaa...
/usr/bin/sox FAIL formats: can't open input file `outputofpcap.wav_3.PCMA': No such file or directory
Clean option
Operation complete
Pls, check.
All 3 tshark commands in the script need an additional option in order to work for me with raw RTP pcap file without conversation:
-o rtp.heuristic_rtp:TRUE
If this can help anyone... I had some problems with this part:
tshark -n -r $CAPFILE -Y "rtp.ssrc == ${ssrc[$item]}" -T fields -e rtp.payload > /tmp/pcap2wav.payloads${item} 2> /dev/null
for payload in `cat /tmp/pcap2wav.payloads${item}`;do IFS=:;for byte in $payload; do printf "\\x$byte" >> ${TARGETFILE}_$item.$codec; done; done
unset IFS
Because tshark returns the output of rtp.payload as hexa string not separated by colons (like "555555" instead of "55:55:55").
To fix this, I replaced the above snippet by this one using xxd:
tshark -n -r $CAPFILE -Y "rtp.ssrc == ${ssrc[$item]}" -T fields -e rtp.payload 2> /dev/null | xxd -r -p > ${TARGETFILE}_$item.$codec
Here is my version of tshark:
$ tshark --version
TShark (Wireshark) 3.2.3 (Git v3.2.3 packaged as 3.2.3-1)
Has anyone else noticed this issue?
One more thing, where can I retrieve fs_encode for g729?
Any idea how to change this script in order for it to take into consideration silence suppression? When any of the streams has silence suppression both streams are unsynchronized after that.
Great script. Would be more useful if you put each RTP stream into a Left/Right Stero mix in the wav.