Reject HTTP URLs when loading link previews

This commit is contained in:
Evan Hahn 2020-10-01 17:17:35 -05:00 committed by Josh Perez
parent c57f7f1cdb
commit 6e1a83ae4e
3 changed files with 273 additions and 23 deletions

View File

@ -10,6 +10,12 @@ import {
MIMEType,
} from '../types/MIME';
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
// Lifted from the `fetch` spec [here][0].
// [0]: https://fetch.spec.whatwg.org/#redirect-status
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
// Though we'll accept HTML of any Content-Length (including no specified length), we
@ -59,6 +65,68 @@ type ParsedContentType =
| { type: null; charset: null }
| { type: MIMEType; charset: null | string };
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
async function fetchWithRedirects(
fetchFn: FetchFn,
href: string,
options: RequestInit
): Promise<Response> {
const urlsSeen = new Set<string>();
let nextHrefToLoad = href;
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
if (urlsSeen.has(nextHrefToLoad)) {
window.log.warn('fetchWithRedirects: found a redirect loop');
throw new Error('redirect loop');
}
urlsSeen.add(nextHrefToLoad);
// This `await` is deliberatly inside of a loop.
// eslint-disable-next-line no-await-in-loop
const response = await fetchFn(nextHrefToLoad, {
...options,
redirect: 'manual',
});
if (!REDIRECT_STATUSES.has(response.status)) {
return response;
}
const location = response.headers.get('location');
if (!location) {
window.log.warn(
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
);
throw new Error('no location with redirect');
}
const newUrl = maybeParseUrl(location, nextHrefToLoad);
if (newUrl?.protocol !== 'https:') {
window.log.warn(
'fetchWithRedirects: got a redirect status code and an invalid Location header'
);
throw new Error('invalid location');
}
nextHrefToLoad = newUrl.href;
}
window.log.warn('fetchWithRedirects: too many redirects');
throw new Error('too many redirects');
}
function maybeParseUrl(href: string, base: string): null | URL {
let result: URL;
try {
result = new URL(href, base);
} catch (err) {
return null;
}
// We never need the hash
result.hash = '';
return result;
}
/**
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
* a simplified version for link previews.
@ -289,16 +357,8 @@ const parseMetadata = (
'icon',
'apple-touch-icon',
]);
let imageHref: null | string;
if (rawImageHref) {
try {
imageHref = new URL(rawImageHref, href).href;
} catch (err) {
imageHref = null;
}
} else {
imageHref = null;
}
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
const imageHref = imageUrl ? imageUrl.href : null;
let date: number | null = null;
const rawDate = getOpenGraphContent(document, [
@ -346,12 +406,11 @@ export async function fetchLinkPreviewMetadata(
): Promise<null | LinkPreviewMetadata> {
let response: Response;
try {
response = await fetchFn(href, {
response = await fetchWithRedirects(fetchFn, href, {
headers: {
Accept: 'text/html,application/xhtml+xml',
'User-Agent': 'WhatsApp',
},
redirect: 'follow',
signal: abortSignal,
});
} catch (err) {
@ -444,12 +503,11 @@ export async function fetchLinkPreviewImage(
): Promise<null | LinkPreviewImage> {
let response: Response;
try {
response = await fetchFn(href, {
response = await fetchWithRedirects(fetchFn, href, {
headers: {
'User-Agent': 'WhatsApp',
},
size: MAX_IMAGE_CONTENT_LENGTH,
redirect: 'follow',
signal: abortSignal,
});
} catch (err) {

View File

@ -3,7 +3,7 @@ import * as sinon from 'sinon';
import * as fs from 'fs';
import * as path from 'path';
import AbortController from 'abort-controller';
import { MIMEType } from '../../types/MIME';
import { MIMEType, IMAGE_JPEG } from '../../types/MIME';
import {
fetchLinkPreviewImage,
@ -178,9 +178,9 @@ describe('link preview fetching', () => {
);
});
it("returns null if the response status code isn't 2xx or 3xx", async () => {
it("returns null if the response status code isn't 2xx", async () => {
await Promise.all(
[100, 400, 404, 500, 0, -200].map(async status => {
[100, 304, 400, 404, 500, 0, -200].map(async status => {
const fakeFetch = stub().resolves(makeResponse({ status }));
assert.isNull(
@ -199,7 +199,7 @@ describe('link preview fetching', () => {
);
});
it('asks fetch to follow redirects', async () => {
it("doesn't use fetch's automatic redirection behavior", async () => {
const fakeFetch = stub().resolves(makeResponse());
await fetchLinkPreviewMetadata(
@ -211,10 +211,160 @@ describe('link preview fetching', () => {
sinon.assert.calledWith(
fakeFetch,
'https://example.com',
sinon.match({ redirect: 'follow' })
sinon.match({ redirect: 'manual' })
);
});
[301, 302, 303, 307, 308].forEach(status => {
it(`handles ${status} redirects`, async () => {
const fakeFetch = stub();
fakeFetch.onFirstCall().resolves(
makeResponse({
status,
headers: { Location: 'https://example.com/2' },
body: null,
})
);
fakeFetch.onSecondCall().resolves(makeResponse());
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'test title',
description: null,
date: null,
imageHref: null,
}
);
sinon.assert.calledTwice(fakeFetch);
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2');
});
it(`returns null when seeing a ${status} status with no Location header`, async () => {
const fakeFetch = stub().resolves(makeResponse({ status }));
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
});
});
it('handles relative redirects', async () => {
const fakeFetch = stub();
fakeFetch.onFirstCall().resolves(
makeResponse({
status: 301,
headers: { Location: '/2/' },
body: null,
})
);
fakeFetch.onSecondCall().resolves(
makeResponse({
status: 301,
headers: { Location: '3' },
body: null,
})
);
fakeFetch.onThirdCall().resolves(makeResponse());
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'test title',
description: null,
date: null,
imageHref: null,
}
);
sinon.assert.calledThrice(fakeFetch);
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2/');
sinon.assert.calledWith(fakeFetch.getCall(2), 'https://example.com/2/3');
});
it('returns null if redirecting to an insecure HTTP URL', async () => {
const fakeFetch = stub().resolves(
makeResponse({
status: 301,
headers: { Location: 'http://example.com' },
body: null,
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(fakeFetch);
});
it("returns null if there's a redirection loop", async () => {
const fakeFetch = stub();
fakeFetch.onFirstCall().resolves(
makeResponse({
status: 301,
headers: { Location: '/2/' },
body: null,
})
);
fakeFetch.onSecondCall().resolves(
makeResponse({
status: 301,
headers: { Location: '/start' },
body: null,
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com/start',
new AbortController().signal
)
);
sinon.assert.calledTwice(fakeFetch);
});
it('returns null if redirecting more than 20 times', async () => {
const fakeFetch = stub().callsFake(async () =>
makeResponse({
status: 301,
headers: { Location: `/${Math.random()}` },
body: null,
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com/start',
new AbortController().signal
)
);
sinon.assert.callCount(fakeFetch, 20);
});
it('returns null if the response has no body', async () => {
const fakeFetch = stub().resolves(makeResponse({ body: null }));
@ -990,7 +1140,7 @@ describe('link preview fetching', () => {
);
});
it("returns null if the response status code isn't 2xx or 3xx", async () => {
it("returns null if the response status code isn't 2xx", async () => {
const fixture = await readFixture('kitten-1-64-64.jpg');
await Promise.all(
@ -1021,6 +1171,48 @@ describe('link preview fetching', () => {
);
});
// Most of the redirect behavior is tested above.
it('handles 301 redirects', async () => {
const fixture = await readFixture('kitten-1-64-64.jpg');
const fakeFetch = stub();
fakeFetch.onFirstCall().resolves(
new Response(null, {
status: 301,
headers: {
Location: '/result.jpg',
},
})
);
fakeFetch.onSecondCall().resolves(
new Response(fixture, {
headers: {
'Content-Type': IMAGE_JPEG,
'Content-Length': fixture.length.toString(),
},
})
);
assert.deepEqual(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
),
{
data: fixture.buffer,
contentType: IMAGE_JPEG,
}
);
sinon.assert.calledTwice(fakeFetch);
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com/img');
sinon.assert.calledWith(
fakeFetch.getCall(1),
'https://example.com/result.jpg'
);
});
it('returns null if the response is too small', async () => {
const fakeFetch = stub().resolves(
new Response(await readFixture('kitten-1-64-64.jpg'), {

View File

@ -13152,7 +13152,7 @@
"rule": "DOM-innerHTML",
"path": "ts/linkPreviews/linkPreviewFetch.js",
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
"lineNumber": 164,
"lineNumber": 212,
"reasonCategory": "usageTrusted",
"updated": "2020-09-09T21:20:16.643Z",
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
@ -13161,7 +13161,7 @@
"rule": "DOM-innerHTML",
"path": "ts/linkPreviews/linkPreviewFetch.ts",
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
"lineNumber": 215,
"lineNumber": 283,
"reasonCategory": "usageTrusted",
"updated": "2020-09-09T21:20:16.643Z",
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
@ -13344,4 +13344,4 @@
"reasonCategory": "falseMatch",
"updated": "2020-09-08T23:07:22.682Z"
}
]
]