-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoundcloud-liberator.html
341 lines (329 loc) · 23.3 KB
/
soundcloud-liberator.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
<!DOCTYPE html>
<!--
(The MIT License)
Copyright (c) 2016 Kura
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the 'Software'), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-->
<html lang="en">
<head id="head">
<meta charset="utf-8">
<meta name="viewport"
content="width=device-width, initial-scale=1.0, minimum-scale=1.0">
<link rel="alternate" href="https://nekopy.github.io" hreflang="en" />
<link rel="dns-prefetch" href="https://nekopy.github.io">
<link rel="dns-prefetch" href="//code.getmdl.io">
<link rel="shortcut icon" href="https://nekopy.github.io/favicon.ico">
<title>Soundcloud Liberator</title>
</head>
<body>
<div class="eevee-layout mdl-layout mdl-js-layout mdl-color--grey-50">
<header class="eevee-header mdl-color--grey-50 mdl-color-text--grey-800"
itemscope itemtype="http://schema.org/WPHeader">
<div class="mdl-layout__header-row" id="top">
<div aria-expanded="false" role="button" tabindex="0"
class="mdl-layout__drawer-button mdl-color-text--accent eevee-mobile-button mdl-layout--small-screen-only">
<i class="material-icons"></i>
</div>
<span class="eevee-logo mdl-color-text--accent mdl-layout-title">
<h2>
<a href="https://nekopy.github.io" rel="bookmark"
title="stderr">
stderr
</a>
</h2>
</span>
<div class="mdl-layout-spacer" role="presentation"></div>
<nav class="eevee-nav mdl-navigation mdl-layout--large-screen-only"
itemscope itemtype="http://schema.org/SiteNavigationElement"
aria-label="Header navigation">
<a class="mdl-color-text--accent mdl-navigation__link"
href="https://github.com/nekopy" itemprop="url" rel="bookmark">
Profile
</a>
</nav>
</div>
</header>
<div class="eevee-mobile-header mdl-layout__drawer mdl-color--white"
aria-hidden="true">
<span class="mdl-layout-title">
<h2 class="eevee-mobile-logo mdl-color-text--accent">
stderr
</h2>
</span>
<div class="mdl-navigation mdl-color--white">
<nav class="eevee-nav mdl-navigation" itemscope
itemtype="http://schema.org/SiteNavigationElement"
aria-label="Header navigation">
<a class="mdl-color-text--accent mdl-navigation__link"
href="https://nekopy.github.io" itemprop="url" rel="bookmark">
Home
</a>
<a class="mdl-color-text--accent mdl-navigation__link"
href="https://github.com/nekopy" itemprop="url" rel="bookmark">
Profile
</a>
</nav>
</div>
</div>
<div class="eevee-ribbon mdl-color--primary-dark" role="presentation">
</div>
<main class="eevee-main mdl-layout__content">
<div class="eevee-container mdl-grid">
<div role="presentation"
class="mdl-cell mdl-cell--2-col mdl-cell--hide-tablet mdl-cell--hide-phone">
</div>
<div class="eevee-content mdl-color--white mdl-shadow--4dp mdl-color-text--grey-800 mdl-cell mdl-cell--8-col"
aria-label="Main content">
<article itemscope itemtype="http://schema.org/BlogPosting">
<meta itemprop="accessibilityControl" content="fullKeyboardControl">
<meta itemprop="accessibilityControl" content="fullMouseControl">
<meta itemprop="accessibilityControl" content="bookmarks">
<meta itemprop="accessibilityControl" content="captions">
<meta itemprop="accessibilityControl" content="alternativeText">
<meta itemprop="accessibilityControl" content="index">
<meta itemprop="accessibilityControl" content="readingOrder">
<meta itemprop="accessibilityControl" content="structuralNavigation">
<meta itemprop="accessibilityControl" content="tableOfContents">
<meta itemprop="accessibilityHazard" content="noFlashingHazard">
<meta itemprop="accessibilityHazard" content="noMotionSimulationHazard">
<meta itemprop="accessibilityHazard" content="noSoundHazard">
<meta itemprop="accessibilityAPI" content="ARIA">
<div itemprop="author" itemscope
itemtype="https://schema.org/Person" role="presentation">
<a href="https://nekopy.github.io/author/nekopy.html" class="hidden"
itemprop="url" role="presentation">
<span class="hidden" itemprop="name" role="presentation">
neko.py
</span>
</a>
</div>
<meta itemprop="keywords"
content="soundcloud liberator,browser extension,javascript,mp3,soundcloud,m3u,firefox">
<meta itemprop="keywords" content="Web Analysis">
<div class="eevee-meta eevee-share">
<div class="mdl-layout-spacer"></div>
<div>
<ul class="social-share mdl-navigation">
<li class="social-share__link social-share__link--twitter">
<a href="https://twitter.com/intent/tweet?text=Soundcloud%20Liberator&url=https%3A//nekopy.github.io/soundcloud-liberator.html"
title="Share 'Soundcloud Liberator' on Twitter"
onclick="window.open(this.href, 'twitter-share', 'width=550,height=235'); return false;">
<i class="fa fa-twitter" aria-hidden="true"></i>
</a>
</li>
<li class="social-share__link social-share__link--facebook">
<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//nekopy.github.io/soundcloud-liberator.html"
title="Share 'Soundcloud Liberator' on Facebook"
onclick="window.open(this.href, 'facebook-share', 'width=580,height=296'); return false;">
<i class="fa fa-facebook" aria-hidden="true"></i>
</a>
</li>
<li class="social-share__link social-share__link--google-plus">
<a href="https://plus.google.com/share?url=https%3A//nekopy.github.io/soundcloud-liberator.html"
title="Share 'Soundcloud Liberator' on Google+"
onclick="window.open(this.href, 'google-plus-share', 'width=490,height=530');return false;">
<i class="fa fa-google" aria-hidden="true"></i>
</a>
</li>
<li class="social-share__link social-share__link--email">
<a href="mailto:?subject=Soundcloud%20Liberator&body=Soundcloud%20Liberator%20-%20https%3A//nekopy.github.io/soundcloud-liberator.html%0A%0AI%20created%20a%20WebExtension%20that%20lets%20you%20download%20any%20song%20off%20soundcloud.%20The%20download%20is%20available%20here%2C%20along%20with%20source%2C%20and%20an%20explanation%20of%20how%20I%20developed%20it%2C%20using%20an%20unlisted%20API%20command%20and%20the%20soundcloud%20API.' via email">
<i class="material-icons" aria-hidden="true"></i>
</a>
</li>
</ul>
</div>
</div>
<div class="eevee-article">
<div class="eevee-meta mdl-color-text--grey-500">
<time datetime="2017-06-10T00:30:00-07:00"
itemprop="datePublished">
Sat 10 June 2017
</time>
</div>
<h1 itemprop="name">
<a href="https://nekopy.github.io/soundcloud-liberator.html" rel="bookmark"
title="Permalink to 'Soundcloud Liberator'"
itemprop="url">
Soundcloud Liberator
</a>
</h1>
<section itemprop="articleBody" class="article-content">
<p>Hi folks. It's been quite a while since I last wrote a blog post, huh? Well, I've been busy with various projects at home and at work, so cut me some slack. To make it up to you, I've got a treat. <a href="https://nekopy.github.io/assets/plugins/soundcloud_liberator-1.0-an+fx.xpi">The Soundcloud Liberator browser extension for Firefox</a> </p>
<p>The source is <a href="https://github.com/nekopy/soundcloud-liberator">available here</a></p>
<h1>Usage & Rationale</h1>
<p>This Firefox browser plugin allows you to download any track off of soundcloud, regardless of whether the download link is available. The usage is simple. You install the browser plugin, then visit any sound page on soundcloud (as in, the page for a single song). Once you're there, click the new FREE icon in your browser, and it'll open a tab that lets you download the track by saving it with your browser. It doesn't support downloading playlists right now. This browser plugin arose out of my frustration with soundcloud tracks that are uploaded under a Creative Commons license, but had no download link. If a track is Creative Commons, it's meant to be shared, right? I feel like by restricting download of tracks like that, it's against the spirit of the license. This was particularly annoying when I was specifically looking for CC content to use in other projects, and I'd find something great, only for there to be no download link. What's even more annoying is that unless you have some kind of pro account on soundcloud, downloads will be capped after a certain number, so even if a user intended for a song to be universally available, the download might be disabled just because the track had too many downloads already.</p>
<h1>Development</h1>
<p>At first, I had assumed that I'd be able to just look at the network traffic and grab my MP3 and be done with it. In fact, I vaguely remember being able to do this in the past. So I pulled up the network inspector in firefox, filtered it to media files and hit play. What I found was something else entirely. </p>
<p><img alt="Several Network Requests" src="https://nekopy.github.io/assets/images/network-mess.png"></p>
<p>It seems that for some reason, soundcloud splits a file into segments. There's a couple different reasons this might be. For one, this might allow us to do parallelized download. Additionally, this might allow load balancing, or letting really really hot songs be spread across servers. If you picked any one of these URLs out, you could pull it up in a tab, but it'd only be a segment of the song. Let's take a look at one of the actual MP3 urls:</p>
<div class="highlight"><pre><span></span>https://cf-hls-media.sndcdn.com/media/3352449/3512109/hbsslqcaoE4T.128.mp3?Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiKjovL2NmLWhscy1tZWRpYS5zbmRjZG4uY29tL21lZGlhLyovKi9oYnNzbHFjYW9FNFQuMTI4Lm1wMyIsIkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTQ5NjQ3NzYwN319fV19&Signature=h8BFZrBB1CHlEss~JTosvMS7alBB7awLVg-O64B~jljrCLdm1wERXceivavohPn42gzDdS9tCCmRovoPma5y6GZ0WqW5QaXUEQAfi-EYSMhtVuIyGgZTZ4h45GweXCpBxtGmzTILao1qj5Xe9n3OgWplYQFeVNFSTOXoxsa8gkRMf~LUddB9qPbQVxMBS8E1cZHQT5bADaYab8mXaRTkNfJAWB6dt7fqynZVKXkqK5Gsa7CoRon08JG2GtvkJt4EXLiOUESgGZtbFe6Ev6eLC0xz6vqu-Vw5DWyax~pMMe2QDqYGvrvWPx3AgH4-0zn2fpeuG2QCkLyrm6V5TjlXzQ__&Key-Pair-Id=APKAJAGZ7VMH2PFPW6UQ
</pre></div>
<p>Wow that is <em>ugly</em>. Tons of huge CGI nonsense makes this a fairly intimidating URL to look at. However, I quickly noticed a pattern.</p>
<div class="highlight"><pre><span></span>https://cf-hls-media.sndcdn.com/media/3033128/3192788/hbsslqcaoE4T.128.mp3?Po...
https://cf-hls-media.sndcdn.com/media/3192789/3352448/hbsslqcaoE4T.128.mp3?P...
https://cf-hls-media.sndcdn.com/media/3352449/3512109/hbsslqcaoE4T.128.mp3?Polic...
</pre></div>
<p>After the media path, there's two numbers, and the number before is always less than the number after, and across URLs in the network log, they're monotonically increasing. Aha! So these aren't some random numbers. They are in fact some kind of units of time. Maybe seconds or samples. So what would happen if we found the URL with the highest ending sample, and replaced its start sample with 0? Like so...</p>
<div class="highlight"><pre><span></span>https://cf-hls-media.sndcdn.com/media/0/3512109/hbsslqcaoE4T.128.mp3?Polic...
</pre></div>
<p>Well I tried it, and lo and behold, we got ourselves the full track, start to finish. What must be going on here is that the server will somehow split up the tracks on request, and serve it back. This doesn't rule out load balancing going on. There are probably replicas with all the data around. Or it could just be some mechanism to stop people from just grabbing the track outright. This totally sucks though. If I want to download a track, I gotta pull up my network view, seek to the end of the track to trigger the last song segment to download, and then patch the URL up. At this poit, I decided it would be a fun exercise to instead make myself a browser plugin to do it for me.</p>
<p>The first step was to identify how to make a browser plugin. I'm a firefox user, so I immediately went to the Firefox reference, and found <a href="https://developer.mozilla.org/en-US/Add-ons/WebExtensions/Intercept_HTTP_requests">this tutorial</a> which covers how to hook HTTP requests and investigate their content. From there, I was able to build a listener for the browser.webRequest.onCompleted event, which I use in my final product. </p>
<p>The initial idea was to grab each of these ones, find the highest, and associate it with a normal track ID, so i could make sure the request belonged to the page I was visiting. I must admit, I never looked into whether there's separate instances of my listener running for each tab or not, and how the script behaved when the URL you're visiting changes, so instead i use an associative map of track IDs (actually it's mp3 filename, but I'll get into that later) to download links, and when you click on the button, it looks up the track ID in the map to see if the download link is available. The code itself to accomplish this is mostly string manipulation. It isn't particularly interesting. However, in order to index into that map, I need to get the track ID from the current URL. To do this, I need a soundcloud API token. However, the browser accomplishes these calls, and certainly it doesn't have an API token right? </p>
<p>Well in fact you can just take the token that the browser is using and substitute it in for a developer token. To facilitate this, i use my getClientId to pull it out of the CGI parameters. Very simple.</p>
<p>An issue quickly manifested itself, in that in order for me to get the highest sample number, the browser needs to make a request for it first. This was lousy, because it meant that in my initial version, the user needed to click on the end of the track in order to trigger the download. However, I found that the full list of audio segments is available in the form of an M3U playlist file that's downloaded by the browser when you visit a page. What's even better is that M3U is a text-based format. You'll see that in my final code, there is no longer any hooks to listen for the HTTP requests for individual songs. Instead, I simply listen for the M3U and parse out the last track, which, being in chronological order, will always have the highest sample in it.</p>
<p>However, there's one last issue. When we read our M3U file and get the MP3's, we have no way of getting the track ID from the MP3 url, since it has no info like that in it. So we must save the files into our map just by the filename. But there's no way exposed in the public API to go either from an MP3 filename to a track ID or vice-versa. I seemed to be SOL after quite a bit of work... However, after reading through the HTTP requests that are made, I found an interesting API call:</p>
<div class="highlight"><pre><span></span>/tracks/{trackID}/streams
</pre></div>
<p>This appeared to do exactly what I wanted it to: resolve the URL to the actual mp3 name. However, to request that MP3 file, we still need some magical CGI tokens on the end, so I wasnt able to just use this for anything, however that could be a possibility if I was able to cache the policy token stuff at the end of the files. In fact, using the policy token stuff did work. Apparently it's valid long enough for me to reuse it later. I don't know how long it expires, and eventually it will likely leave my cache stale, for which the liberator currently has no resolution. You have to just refresh the page. </p>
<p>This magic sauce is used in my resolveStreamInfo function which is used in the process of going from a track ID to a URL, which allows me to index into my map.</p>
<p>The rest of the code is fairly straightforward. We just wait for the user to click the button, and if they're on a soundcloud song page we try a lookup in our map, and spawn a tab with the full-length mp3 in it.</p>
<p>That's the long and short of it. I've used the plugin several times now, and it's integrated into my full browser. Hope you enjoy.</p>
</section>
</div>
</article>
</div>
</div>
</main>
<div class="eevee-pagination__container eevee-container mdl-grid">
<div role="presentation"
class="mdl-cell mdl-cell--2-col mdl-cell--hide-tablet mdl-cell--hide-phone">
</div>
<div class="mdl-color-text--grey-800 mdl-cell mdl-cell--8-col">
<nav class="eevee-pagination mdl-cell mdl-cell--12-col" itemscope
itemtype="http://schema.org/SiteNavigationElement"
aria-label="Pagination">
<div class="eevee-spacer"></div>
</nav> </div>
</div>
<footer class="mdl-mega-footer" itemscope
itemtype="http://schema.org/SiteNavigationElement">
<div class="mdl-mega-footer--top-section">
<div class="mdl-mega-footer--drop-down-section">
<ul class="mdl-mega-footer--link-list">
<li>
<a href="#top" itemprop="url"
title="Back to the top of the page">
Back to the top of the page
</a>
</li>
</ul>
</div>
</div>
<div class="mdl-mega-footer--middle-section" itemscope
itemtype="http://schema.org/SiteNavigationElement"
aria-label="Footer navigation">
<div class="mdl-mega-footer--drop-down-section">
<h1 class="mdl-mega-footer--heading">Menu</h1>
<ul class="mdl-mega-footer--link-list">
<li>
<a href="https://github.com/nekopy" itemprop="url" rel="bookmark">
Profile
</a>
</li>
</ul>
</div>
<div class="mdl-mega-footer--drop-down-section">
<h1 class="mdl-mega-footer--heading">Categories</h1>
<ul class="mdl-mega-footer--link-list">
<li>
<a href="https://nekopy.github.io/category/linux.html" itemprop="url"
rel="bookmark">
linux
</a>
</li>
<li>
<a href="https://nekopy.github.io/category/misc.html" itemprop="url"
rel="bookmark">
misc
</a>
</li>
<li>
<a href="https://nekopy.github.io/category/web-analysis.html" itemprop="url"
rel="bookmark">
Web Analysis
</a>
</li>
</ul>
</div>
<div class="mdl-mega-footer--drop-down-section">
<h1 class="mdl-mega-footer--heading">Social</h1>
<ul class="mdl-mega-footer--link-list">
<li>
<a href="https://discord.gg/Baka" itemprop="url" rel="bookmark">
Bakabot Discord
</a>
</li>
</ul>
</div>
<div class="mdl-mega-footer--drop-down-section">
<h1 class="mdl-mega-footer--heading">Links</h1>
<ul class="mdl-mega-footer--link-list">
<li>
<a href="http://getpelican.com/" itemprop="url" rel="bookmark">
Pelican
</a>
</li>
<li>
<a href="https://www.eff.org/" itemprop="url" rel="bookmark">
EFF
</a>
</li>
<li>
<a href="https://fsf.org/" itemprop="url" rel="bookmark">
The Free Software Foundation
</a>
</li>
<li>
<a href="https://creativecommons.org/" itemprop="url" rel="bookmark">
Creative Commons
</a>
</li>
</ul>
</div>
</div> <div class="mdl-mega-footer--bottom-section">
<div class="mdl-logo">
<a href="https://nekopy.github.io" rel="bookmark" itemprop="url"
title="stderr">
stderr
</a>
</div>
<ul class="eevee-footer mdl-mega-footer--link-list">
<li>Powered by love & rainbow sparkles.</li>
<li><a href="https://kura.io/eevee/" title="Eevee">Eevee</a> theme by <a href="https://kura.io/" title="kura.io">kura.io</a></li>
<li>
<iframe src="https://keroserene.net/snowflake/embed.html" width="88" height="16" frameborder="0" scrolling="no"></iframe>
</li>
</ul>
</div>
</footer>
</div>
<link rel="stylesheet"
href="//code.getmdl.io/1.1.3/material.blue_grey-indigo.min.css">
<link rel="stylesheet" type="text/css"
href="https://nekopy.github.io/theme/css/font-awesome.css">
<link rel="stylesheet" type="text/css"
href="https://nekopy.github.io/theme/css/material-icons.css">
<link rel="stylesheet" type="text/css"
href="https://nekopy.github.io/theme/css/pygments.css">
<link rel="stylesheet" type="text/css"
href="https://nekopy.github.io/theme/css/eevee.css">
<link rel="stylesheet" type="text/css"
href="https://nekopy.github.io/theme/css/custom.css">
<script async src="https://nekopy.github.io/theme/js/material.js">
</script>
</body>
</html>