hybrid security analysis of web javascript code via ... · hybrid security analysis of web...
TRANSCRIPT
hybrid security analysis of web JavaScript code via dynamic
partial evaluation
Omer Tripp Pietro Ferrara Marco Pistoia !
IBM Research, NY
1
Work published at the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA 2014) ���Recipient of the ACM SIGSOFT Distinguished Paper Award
web client-side code*
5%
25%30%
0%
8%
15%
23%
30%
38%
9 years ago 4 years ago today
* data due to IBM application security research team
2
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!* data due to IBM application security research team
3
reflected XSS
attacker !
web app!script reflected into HTML response without proper encoding
attacker’s evil script executed using ���victim’s credentials
link embedded with evil script
victim !
4!
DOM-based XSS
attacker !
NO reflection intoHTML response
evil script NOT sent to server
victim !
www.ibm.com/index.html?name=<script>…</script>
ibm.com !
5
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!* data due to IBM application security research team
6
open redirect
attacker !
ibm.com !
victim !
attacker.com !
www.ibm.com/index.html?target=attacker.com
7
client-side vulnerabilities*
DOM-based XSS
var pos = document.location.href.indexOf("name=");document.write(document.URL.substring(pos, document.URL.length));
open redirect
var pos = document.location.href.indexOf("target=");var val = document.location.href.substring(pos);
document.location.href = "http://" + val;
>15% vulnerable to these attacks!* data due to IBM application security research team
8
JavaScript complexities
9!
JavaScript complexities
eval and its relatives !eval("document.write('evil')");
10!
JavaScript complexities
reflective property access !var a = "foo" + "bar";
var b = obj[a];
11!
JavaScript complexities
arguments array !bar() { if (arguments.length > 3) foo(arguments[2]); }
bar(1, ”x”, 3)
12!
JavaScript complexities
prototype-chain property lookup !function F() { this.f = document.location; }
function G() { }G.prototype = new F(); var g = new G(); write(g.bar);
13!
JavaScript complexities
function pointers !var m = function() { … }
var k = function(f) { f(); }k(m);
14!
JavaScript complexities
lexical scoping!function foo() {
var y = 42; var bar = function() { write(y); } }
15!
JavaScript complexities
…!
16!
motivating examplevar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
17
motivating examplevar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
18
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
19
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
20
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
21
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
22
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
23
taint analysisvar search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
(real-world JavaScript code from the Alcatel-Lucent website)!
BOOM?!
24
var search_term = ‘login.html’;
var str = document.url; // source
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = str.substring(0, url_check);
result = result + ‘login.jsp’ +
str.substring(url_check +
search_term.length), str.length);
document.url = result; // sink
}
dynamic partial evaluation
(real-world JavaScript code from the Alcatel-Lucent website)!
25
dynamic partial evaluationvar search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = “http://x.com/“;
result= “http://x.com/login.jsp?p1=v1”;
document.url = result;
}
(real-world JavaScript code from the Alcatel-Lucent website)!
document.location;
str.substring(0, url_check);result + ‘login.jsp’ +
str.substring(url_check +
search_term.length),str.length);
26
dynamic partial evaluationvar search_term = ‘login.html’;
var str = “http://x.com/login.html?p1=v1”;
var url_check = str.indexOf(search_term);
if (url_check > -1) {
var result = “http://x.com/“;
result= “http://x.com/login.jsp?p1=v1”;
document.url = result;
}
(real-world JavaScript code from the Alcatel-Lucent website)!
27
our hybrid approach
var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
GET http://x.com/login.html?p1=v1 !
var search_term = ‘login.html’;var str = “http://x.com/login.html?p1=v1”;var url_check = str.indexOf(search_term);…
var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
http://x.com/login.html?p1=v1 !
28
Dynamic Oracle!• Crawls Web site!• Collects dynamic information!• Links references to the DOM with partially concretized values!
Traditional Static Taint Analysis!• Looks for flows from sources to sinks!
Static String Analysis!• Determines which parts of a string are beyond user control!• Leverages string information for better classification of findings!
our hybrid approach
var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
GET http://x.com/login.html?p1=v1 !
var search_term = ‘login.html’;var str = “http://x.com/login.html?p1=v1”;var url_check = str.indexOf(search_term);…
accuracy !var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
http://x.com/login.html?p1=v1 !
29
Dynamic Oracle!• Crawls Web site!• Collects dynamic information!• Links references to the DOM with partially concretized values!
Traditional Static Taint Analysis!• Looks for flows from sources to sinks!
Static String Analysis!• Determines which parts of a string are beyond user control!• Leverages string information for better classification of findings!
our hybrid approach
var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
GET http://x.com/login.html?p1=v1 !
var search_term = ‘login.html’;var str = “http://x.com/login.html?p1=v1”;var url_check = str.indexOf(search_term);…
coverage !var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
http://x.com/login.html?p1=v1 !
30
Dynamic Oracle!• Crawls Web site!• Collects dynamic information!• Links references to the DOM with partially concretized values!
Traditional Static Taint Analysis!• Looks for flows from sources to sinks!
Static String Analysis!• Determines which parts of a string are beyond user control!• Leverages string information for better classification of findings!
our hybrid approach
var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
GET http://x.com/login.html?p1=v1 !
var search_term = ‘login.html’;var str = “http://x.com/login.html?p1=v1”;var url_check = str.indexOf(search_term);…
lightweight !var search_term = ‘login.html’;var str = document.url; // sourcevar url_check = str.indexOf(search_term);…
http://x.com/login.html?p1=v1 !
31
Dynamic Oracle!• Crawls Web site!• Collects dynamic information!• Links references to the DOM with partially concretized values!
Traditional Static Taint Analysis!• Looks for flows from sources to sinks!
Static String Analysis!• Determines which parts of a string are beyond user control!• Leverages string information for better classification of findings!
http://x.com/login.html?p1=v1document.location
static analysis: JSA — intuition
http://x.com/login.html? .*
system-controlled prefix
attacker-controlled suffix
32
static analysis: JSA — domain
var str = document.location;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
33
static analysis: JSA — domain
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
34
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — domain
Prx Idx
{str},HTTP://X.Com/login.html?,T
35
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — domain
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
36
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — domain
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T {n: 13}
37
static analysis: JSA — domain
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T {n: 13}
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………{tmp},HTTP://X.Com/,F
{n: 13}…………………
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
38
static analysis: JSA — domain
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T {n: 13}
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………{tmp},HTTP://X.Com/,F
{n: 13}…………………
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
39
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — transformers
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
40
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — transformers
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T {n: 13}
41
var str = “HTTP://X.Com/login.html?p1=v1”;
var lstr = str.toLowerCase();
var n = lstr.indexOf(“login.html”);
if (n > -1) {
var tmp = str.substring(0,n);
document.location.href = tmp;
}
static analysis: JSA — transformers
42
Prx Idx
{str},HTTP://X.Com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T {n: 13}
{str},HTTP://X.Com/login.html?,T{lstr},http://x.com/login.html?,T
…………………………………………………………………………………………………{tmp},HTTP://X.Com/,F
{n: 13}…………………
JSA written atop the WALA framework
IFDS problem / distributive analysis
optimization: staged analysis
1st stage: taint analysis
2nd stage: JSA applied to source/sink pairs not eliminated by taint analysis
featured in IBM Security AppScan Standard Edition (AppScan Std) V8.6
note on implementation
43
170,000 webpages / 675 websites:
Fortune 500
top 100 (www.web100.com)
handpicked security and IT websites
up to 500 pages per site via nonintrusive crawling
no login
only link crawling
evaluation — benchmarks
44
evaluation — benchmarksvar pageUrl = window.location;
var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
45
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
(from the Corning website)!
(from the IBM Team Concert website)!
evaluation — benchmarksvar pageUrl = window.location;
var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
46
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
(from the Corning website)!
(from the IBM Team Concert website)!
Both cId and pageURL flow into the!target URL’s query string, after the ‘?’!
var url = window.location.href;
var i = url.indexOf("?");
if (i>0) {url=url.substring(0, i); i = url.indexOf(“/ntopic/");}
if (i<0) { return; }
url = url.substring(0, i+1);
url = url+"livehelp/?pluginID="+a;
window.location.href = url;
evaluation — benchmarksvar pageUrl = window.location;
var cId = document.getElementById("ctl00_ContentPlaceHolder1_hdnContentId").value;
var url = "/CMS/OverviewPrint.aspx?id=" + cid + "&url=" + pageUrl;
openPopupWindow(url);
(from the Corning website)!
(from the IBM Team Concert website)!
47
Both cId and pageURL flow into the!target URL’s query string, after the ‘?’!
Computes a prefix of the URL string that!lies within the host path, and appends!
constant string livehelp/?pluginID=
1st experiment: comparison with taint analysis
compared against commercial taint analysis (used in AppScan Std V8.5)
entire set of webpages
manual classification of results by professional ethical hacker (from IBM application security team): TP/FP
48
1st experiment: results
2,639 2,639301
4,443
0
2000
4000
6000
8000
JSA taint analysis
TPs FPs
49
90% reduction!!
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href = zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
evaluation — FPs?
(from the Alltel website)!
50
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href = zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
evaluation — FPs?
(from the Alltel website)!
51
function changeZipRedirect(zipCodeRedirect) {
var currURL = document.location.href; ...;
wcmContext = currURL.split(’WCM GLOBAL CONTEXT’);
var redirectStr = wcmContext[1]; ...;
if (redirectStr .match(”pmapmc=”) == null) {
/∗ redirect to the zipcode page ∗/
document.location.href = zipCodeRedirect + ”&redirectURL”
+ redirectStr; } }
evaluation — FPs?
(from the Alltel website)!
52
unresolved constant !
2nd experiment: comparison with black-box testing
compared against commercial testing engine (that of AppScan Std V8.6)
~10% (60 / 675) of websites sampled at random
website fragment retrieved by crawler deployed locally for intrusive testing
53
2nd experiment: results
configuration! vulnerable websites ! false positives !
JSA enabled ! 33! 4!
JSA disabled ! 8! 0!
54
conclusion
JavaScript security: a BIG deal
taint analysis: poor user experience (>60% FPs!)
the key: dynamic partial evaluation
high coverage
precision boost with low overhead
JSA: novel form of string analysis
scalable (staged solution atop taint analysis)
90% reduction in FPs!
55
<script>alert(‘thank you!’)</script>
56