Optimize number of range requests in updater

This commit is contained in:
Fedor Indutny 2022-03-01 14:44:29 -08:00 committed by GitHub
parent 1a751c07fc
commit 19441cd3f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 409 additions and 93 deletions

View File

@ -908,6 +908,28 @@ Signal Desktop makes use of the following open source projects.
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
## dicer
Copyright Brian White. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
## emoji-datasource
The MIT License (MIT)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -95,6 +95,7 @@
"config": "1.28.1",
"copy-text-to-clipboard": "2.1.0",
"dashdash": "1.14.1",
"dicer": "0.3.1",
"emoji-datasource": "7.0.2",
"emoji-datasource-apple": "7.0.2",
"emoji-regex": "9.2.2",
@ -199,6 +200,7 @@
"@types/config": "0.0.39",
"@types/dashdash": "1.14.0",
"@types/debug": "4.1.7",
"@types/dicer": "0.2.2",
"@types/filesize": "3.6.0",
"@types/fs-extra": "5.0.5",
"@types/google-libphonenumber": "7.4.23",
@ -215,7 +217,7 @@
"@types/mkdirp": "0.5.2",
"@types/mocha": "9.0.0",
"@types/mustache": "4.1.2",
"@types/node": "14.14.37",
"@types/node": "16.11.26",
"@types/node-fetch": "2.5.7",
"@types/node-forge": "0.9.5",
"@types/normalize-path": "3.0.0",

View File

@ -15,8 +15,10 @@ async function main() {
const originalPath = path.join(FIXTURES, 'diff-original.bin');
await fs.writeFile(originalPath, original);
// Add a broken byte to help create useful blockmaps
original[Math.floor(Math.random() * original.length)] = 0;
// Add a few broken bytes to help create useful blockmaps
for (let i = 0; i < 3; i += 1) {
original[Math.floor(Math.random() * original.length)] = 0;
}
const modifiedPath = path.join(FIXTURES, 'diff-modified.bin');
await fs.writeFile(modifiedPath, original);

View File

@ -17,6 +17,7 @@ import {
} from '../../updater/differential';
const FIXTURES = path.join(__dirname, '..', '..', '..', 'fixtures');
const CRLF = '\r\n';
describe('updater/differential', () => {
describe('computeDiff', () => {
@ -60,8 +61,8 @@ describe('updater/differential', () => {
const newFile = 'diff-modified.bin';
const newBlockFile = getBlockMapFileName(newFile);
const newHash =
'1+eipIhsN0KhpXQdRnXnGzdBCP3sgYqIXf+WK/KDK08' +
'VvH0acjX9PGf+ilIVYYWsOqp02lxrdx4gXW7V+RZY5w==';
'oEXIz7JVN1phjmumPLVQuwSYa+tHLEn5/a+q9w/pbk' +
'bnCaXAioWrAIq1P9HeqNQ0Lpsb4mWey632DUPnUXqfiw==';
const allowedFiles = new Set([
oldFile,
@ -82,26 +83,52 @@ describe('updater/differential', () => {
return;
}
const range = req.headers.range?.match(/^bytes=(\d+)-(\d+)$/);
const fullFile = await fs.readFile(path.join(FIXTURES, file));
let content = await fs.readFile(path.join(FIXTURES, file));
const totalSize = content.length;
if (range) {
content = content.slice(
parseInt(range[1], 10),
parseInt(range[2], 10) + 1
);
res.setHeader(
'content-range',
`bytes ${range[1]}-${range[2]}/${totalSize}`
);
res.writeHead(206);
} else {
const rangeHeader = req.headers.range?.match(/^bytes=([\d,\s-]+)$/);
if (!rangeHeader) {
res.writeHead(200);
res.end(fullFile);
return;
}
res.end(content);
const ranges = rangeHeader[1].split(/\s*,\s*/g).map(value => {
const range = value.match(/^(\d+)-(\d+)$/);
strictAssert(range, `Invalid header: ${rangeHeader}`);
return [parseInt(range[1], 10), parseInt(range[2], 10)];
});
const BOUNDARY = 'f8f254ce1ba37627';
res.setHeader(
'content-type',
`multipart/byteranges; boundary=${BOUNDARY}`
);
res.writeHead(206);
const totalSize = fullFile.length;
const multipart = Buffer.concat([
...ranges
.map(([from, to]) => [
Buffer.from(
[
`--${BOUNDARY}`,
'Content-Type: binary/octet-stream',
`Content-Range: bytes ${from}-${to}/${totalSize}`,
'',
'',
].join(CRLF)
),
fullFile.slice(from, to + 1),
Buffer.from(CRLF),
])
.flat(),
Buffer.from(`--${BOUNDARY}--${CRLF}`),
]);
res.end(multipart);
});
server.unref();
@ -126,20 +153,44 @@ describe('updater/differential', () => {
sha512: newHash,
});
assert.strictEqual(data.downloadSize, 32768);
assert.strictEqual(data.downloadSize, 62826);
assert.deepStrictEqual(data.diff, [
{ action: 'copy', readOffset: 0, size: 204635, writeOffset: 0 },
{ action: 'copy', size: 44288, readOffset: 0, writeOffset: 0 },
{
action: 'download',
size: 32768,
readOffset: 204635,
writeOffset: 204635,
size: 8813,
readOffset: 44288,
writeOffset: 44288,
},
{
action: 'copy',
readOffset: 237403,
size: 24741,
writeOffset: 237403,
size: 37849,
readOffset: 53101,
writeOffset: 53101,
},
{
action: 'download',
size: 21245,
readOffset: 90950,
writeOffset: 90950,
},
{
action: 'copy',
size: 116397,
readOffset: 112195,
writeOffset: 112195,
},
{
action: 'download',
size: 32768,
readOffset: 228592,
writeOffset: 228592,
},
{
action: 'copy',
size: 784,
readOffset: 261360,
writeOffset: 261360,
},
]);
});

View File

@ -0,0 +1,40 @@
// Copyright 2022 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { assert } from 'chai';
import { EventEmitter } from 'events';
import { wrapEventEmitterOnce as once } from '../../util/wrapEventEmitterOnce';
describe('wrapEventEmitterOnce', () => {
let ee: EventEmitter;
beforeEach(() => {
ee = new EventEmitter();
});
it('should get the event arguments', async () => {
const result = once(ee, 'result');
ee.emit('result', 1, 2, 3);
assert.deepStrictEqual(await result, [1, 2, 3]);
});
it('should handle error event', async () => {
const result = once(ee, 'result');
ee.emit('error', new Error('aha!'));
await assert.isRejected(result, 'aha!');
});
it('should stop handling error event after result', async () => {
const result = once(ee, 'result');
ee.emit('result', 'okay');
assert.deepStrictEqual(await result, ['okay']);
assert.strictEqual(ee.listeners('error').length, 0);
});
});

View File

@ -487,7 +487,8 @@ export abstract class Updater {
await downloadDifferentialData(
targetUpdatePath,
differentialData,
updateOnProgress ? this.throttledSendDownloadingUpdate : undefined
updateOnProgress ? this.throttledSendDownloadingUpdate : undefined,
this.logger
);
gotUpdate = true;

View File

@ -1,21 +1,27 @@
// Copyright 2022 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type { FileHandle } from 'fs/promises';
import { readFile, open, mkdtemp, mkdir, rename, unlink } from 'fs/promises';
import { promisify } from 'util';
import { gunzip as nativeGunzip } from 'zlib';
import { tmpdir } from 'os';
import path from 'path';
import got from 'got';
import { chunk as lodashChunk } from 'lodash';
import pMap from 'p-map';
import Dicer from 'dicer';
import { strictAssert } from '../util/assert';
import { wrapEventEmitterOnce } from '../util/wrapEventEmitterOnce';
import type { LoggerType } from '../types/Logging';
import { getGotOptions } from './got';
import { checkIntegrity } from './util';
const gunzip = promisify(nativeGunzip);
const SUPPORTED_VERSION = '2';
const MAX_SINGLE_REQ_RANGES = 50; // 20 bytes per range, ~1kb total per request
const MAX_CONCURRENCY = 5;
type BlockMapFileJSONType = Readonly<{
@ -64,6 +70,15 @@ export type PrepareDownloadOptionsType = Readonly<{
sha512: string;
}>;
export type DownloadRangesOptionsType = Readonly<{
url: string;
output: FileHandle;
ranges: ReadonlyArray<DiffType>;
logger?: LoggerType;
abortSignal?: AbortSignal;
chunkStatusCallback: (chunkSize: number) => void;
}>;
export function getBlockMapFileName(fileName: string): string {
return `${fileName}.blockmap`;
}
@ -225,7 +240,8 @@ export function isValidPreparedData(
export async function download(
newFile: string,
{ diff, oldFile, newUrl, sha512 }: PrepareDownloadResultType,
statusCallback?: (downloadedSize: number) => void
statusCallback?: (downloadedSize: number) => void,
logger?: LoggerType
): Promise<void> {
const input = await open(oldFile, 'r');
@ -235,71 +251,53 @@ export async function download(
const output = await open(tempFile, 'w');
// Share agent
const gotOptions = getGotOptions();
const copyActions = diff.filter(({ action }) => action === 'copy');
let downloadedSize = 0;
let isAborted = false;
const copyPromise: Promise<unknown> = Promise.all(
copyActions.map(async ({ readOffset, size, writeOffset }) => {
const chunk = Buffer.alloc(size);
const { bytesRead } = await input.read(
chunk,
0,
chunk.length,
readOffset
);
strictAssert(
bytesRead === size,
`Not enough data to read from offset=${readOffset} size=${size}`
);
await output.write(chunk, 0, chunk.length, writeOffset);
})
);
const downloadActions = diff.filter(({ action }) => action === 'download');
const abortController = new AbortController();
const { signal: abortSignal } = abortController;
try {
await pMap(
diff,
async ({ action, readOffset, size, writeOffset }) => {
if (action === 'copy') {
const chunk = Buffer.alloc(size);
const { bytesRead } = await input.read(
chunk,
0,
chunk.length,
readOffset
);
let downloadedSize = 0;
strictAssert(
bytesRead === size,
`Not enough data to read from offset=${readOffset} size=${size}`
);
await output.write(chunk, 0, chunk.length, writeOffset);
return;
}
strictAssert(action === 'download', 'invalid action type');
const stream = got.stream(`${newUrl}`, {
...gotOptions,
headers: {
range: `bytes=${readOffset}-${readOffset + size - 1}`,
},
});
stream.once('response', ({ statusCode }) => {
if (statusCode !== 206) {
stream.destroy(new Error(`Invalid status code: ${statusCode}`));
}
});
let lastOffset = writeOffset;
for await (const chunk of stream) {
strictAssert(
lastOffset - writeOffset + chunk.length <= size,
'Server returned more data than expected'
);
await output.write(chunk, 0, chunk.length, lastOffset);
lastOffset += chunk.length;
downloadedSize += chunk.length;
if (!isAborted) {
await Promise.all([
copyPromise,
downloadRanges({
url: newUrl,
output,
ranges: downloadActions,
logger,
abortSignal,
chunkStatusCallback(chunkSize) {
downloadedSize += chunkSize;
if (!abortSignal.aborted) {
statusCallback?.(downloadedSize);
}
}
strictAssert(
lastOffset - writeOffset === size,
`Not enough data to download from offset=${readOffset} size=${size}`
);
},
{ concurrency: MAX_CONCURRENCY }
);
},
}),
]);
} catch (error) {
isAborted = true;
abortController.abort();
throw error;
} finally {
await Promise.all([input.close(), output.close()]);
@ -316,3 +314,148 @@ export async function download(
}
await rename(tempFile, newFile);
}
export async function downloadRanges(
options: DownloadRangesOptionsType
): Promise<void> {
const { ranges } = options;
// If we have way too many ranges - split them up into multiple requests
if (ranges.length > MAX_SINGLE_REQ_RANGES) {
await pMap(
lodashChunk(ranges, MAX_SINGLE_REQ_RANGES),
subRanges =>
downloadRanges({
...options,
ranges: subRanges,
}),
{ concurrency: MAX_CONCURRENCY }
);
return;
}
// Request multiple ranges in a single request
const { url, output, logger, abortSignal, chunkStatusCallback } = options;
logger?.info('updater/downloadRanges: downloading ranges', ranges.length);
// Map from `Content-Range` header value to respective DiffType object.
const diffByRange = new Map<string, DiffType>();
for (const diff of ranges) {
const { action, readOffset, size } = diff;
strictAssert(action === 'download', 'Incorrect action type');
// NOTE: the range is inclusive, hence `size - 1`
diffByRange.set(`${readOffset}-${readOffset + size - 1}`, diff);
}
const stream = got.stream(`${url}`, {
...getGotOptions(),
headers: {
range: `bytes=${Array.from(diffByRange.keys()).join(',')}`,
},
});
// Each `part` is a separate readable stream for one of the ranges
const onPart = async (part: Dicer.PartStream): Promise<void> => {
const diff = await takeDiffFromPart(part, diffByRange);
let offset = 0;
for await (const chunk of part) {
strictAssert(
offset + chunk.length <= diff.size,
'Server returned more data than expected, ' +
`written=${offset} ` +
`newChunk=${chunk.length} ` +
`maxSize=${diff.size}`
);
await output.write(chunk, 0, chunk.length, offset + diff.writeOffset);
offset += chunk.length;
chunkStatusCallback(chunk.length);
}
strictAssert(
offset === diff.size,
`Not enough data to download from offset=${diff.readOffset} ` +
`size=${diff.size}`
);
};
const [{ statusCode, headers }] = await wrapEventEmitterOnce(
stream,
'response'
);
strictAssert(statusCode === 206, `Invalid status code: ${statusCode}`);
const match = headers['content-type']?.match(
/^multipart\/byteranges;\s*boundary=([^\s;]+)/
);
strictAssert(match, `Invalid Content-Type: ${headers['content-type']}`);
const dicer = new Dicer({ boundary: match[1] });
const partPromises = new Array<Promise<void>>();
dicer.on('part', part => partPromises.push(onPart(part)));
dicer.once('finish', () => stream.destroy());
// Pipe the response stream fully into dicer
// NOTE: we can't use `pipeline` due to a dicer bug:
// https://github.com/mscdex/dicer/issues/26
stream.pipe(dicer);
await wrapEventEmitterOnce(dicer, 'finish');
// Due to the bug above we need to do a manual cleanup
stream.unpipe(dicer);
stream.destroy();
// Wait for individual parts to be fully written to FS
await Promise.all(partPromises);
if (abortSignal?.aborted) {
return;
}
const missingRanges = Array.from(diffByRange.values());
if (missingRanges.length === 0) {
return;
}
throw new Error('Missing ranges');
logger?.info(
'updater/downloadRanges: downloading missing ranges',
diffByRange.size
);
return downloadRanges({
...options,
ranges: missingRanges,
});
}
async function takeDiffFromPart(
part: Dicer.PartStream,
diffByRange: Map<string, DiffType>
): Promise<DiffType> {
const [untypedHeaders] = await wrapEventEmitterOnce(part, 'header');
const headers = untypedHeaders as Record<string, Array<string>>;
const contentRange = headers['content-range'];
strictAssert(contentRange, 'Missing Content-Range header for the part');
const match = contentRange.join(', ').match(/^bytes\s+(\d+-\d+)/);
strictAssert(
match,
`Invalid Content-Range header for the part: "${contentRange}"`
);
const range = match[1];
const diff = diffByRange.get(range);
strictAssert(diff, `Diff not found for range="${range}"`);
diffByRange.delete(range);
return diff;
}

View File

@ -0,0 +1,25 @@
// Copyright 2022 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type { EventEmitter } from 'events';
import { once } from 'events';
export async function wrapEventEmitterOnce(
emitter: EventEmitter,
eventName: string
): Promise<ReturnType<typeof once>> {
const abortController = new AbortController();
const maybeRejection = (async (): Promise<ReturnType<typeof once>> => {
const [error] = await once(emitter, 'error', {
signal: abortController.signal,
});
throw error;
})();
try {
return await Promise.race([maybeRejection, once(emitter, eventName)]);
} finally {
abortController.abort();
}
}

View File

@ -1971,6 +1971,13 @@
dependencies:
"@types/ms" "*"
"@types/dicer@0.2.2":
version "0.2.2"
resolved "https://registry.yarnpkg.com/@types/dicer/-/dicer-0.2.2.tgz#61e3a26ea4bf41cd003fd6f7adb9c78649f753de"
integrity sha512-UPLqCYey+jn5Mf57KFDwxD/7VZYDsbYUi3iyTehLFVjlbvl/JcUTPaot8uKNYLO0EoZpey+rC/s5AF3VxfeC2Q==
dependencies:
"@types/node" "*"
"@types/eslint-scope@^3.7.0":
version "3.7.0"
resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.0.tgz#4792816e31119ebd506902a482caec4951fabd86"
@ -2003,11 +2010,12 @@
integrity sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==
"@types/express-serve-static-core@*":
version "4.16.9"
resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-4.16.9.tgz#69e00643b0819b024bdede95ced3ff239bb54558"
integrity sha512-GqpaVWR0DM8FnRUJYKlWgyARoBUAVfRIeVDZQKOttLFp5SmhhF9YFIYeTPwMd/AXfxlP7xVO2dj1fGu0Q+krKQ==
version "4.17.28"
resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-4.17.28.tgz#c47def9f34ec81dc6328d0b1b5303d1ec98d86b8"
integrity sha512-P1BJAEAW3E2DJUlkgq4tOL3RyMunoWXqbSCygWo5ZIWTjUgN1YnaXWW4VWl/oc8vs/XoYibEGBKP0uZyF4AHig==
dependencies:
"@types/node" "*"
"@types/qs" "*"
"@types/range-parser" "*"
"@types/express@*":
@ -2200,11 +2208,16 @@
dependencies:
"@types/node" "*"
"@types/node@*", "@types/node@14.14.37", "@types/node@^14.6.2":
"@types/node@*", "@types/node@^14.6.2":
version "14.14.37"
resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==
"@types/node@16.11.26":
version "16.11.26"
resolved "https://registry.yarnpkg.com/@types/node/-/node-16.11.26.tgz#63d204d136c9916fb4dcd1b50f9740fe86884e47"
integrity sha512-GZ7bu5A6+4DtG7q9GsoHXy3ALcgeIHP4NnL0Vv2wu0uUB/yQex26v0tf6/na1mm0+bS9Uw+0DFex7aaKr2qawQ==
"@types/node@>=13.7.0":
version "17.0.17"
resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.17.tgz#a8ddf6e0c2341718d74ee3dc413a13a042c45a0c"
@ -2266,6 +2279,11 @@
resolved "https://registry.yarnpkg.com/@types/q/-/q-1.5.2.tgz#690a1475b84f2a884fd07cd797c00f5f31356ea8"
integrity sha512-ce5d3q03Ex0sy4R14722Rmt6MT07Ua+k4FwDfdcToYJcMKNtRVQvJ6JCAPdAmAnbRb6CsX6aYb9m96NGod9uTw==
"@types/qs@*":
version "6.9.7"
resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.7.tgz#63bb7d067db107cc1e457c303bc25d511febf6cb"
integrity sha512-FGa1F62FT09qcrueBA6qYTrJPVDzah9a+493+o2PCXsesWHIn27G98TsSMs3WPNbZIEj4+VJf6saSFpvD+3Zsw==
"@types/quill@1.3.10", "@types/quill@^1.3.10":
version "1.3.10"
resolved "https://registry.yarnpkg.com/@types/quill/-/quill-1.3.10.tgz#dc1f7b6587f7ee94bdf5291bc92289f6f0497613"
@ -5683,6 +5701,13 @@ detect-port@^1.3.0:
address "^1.0.1"
debug "^2.6.0"
dicer@0.3.1:
version "0.3.1"
resolved "https://registry.yarnpkg.com/dicer/-/dicer-0.3.1.tgz#abf28921e3475bc5e801e74e0159fd94f927ba97"
integrity sha512-ObioMtXnmjYs3aRtpIJt9rgQSPCIhKVkFPip+E9GUDyWl8N435znUxK/JfNwGZJ2wnn5JKQ7Ly3vOK5Q5dylGA==
dependencies:
streamsearch "^1.1.0"
diff@5.0.0, diff@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/diff/-/diff-5.0.0.tgz#7ed6ad76d859d030787ec35855f5b1daf31d852b"
@ -13905,6 +13930,11 @@ stream-shift@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.0.tgz#d5c752825e5367e786f78e18e445ea223a155952"
streamsearch@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/streamsearch/-/streamsearch-1.1.0.tgz#404dd1e2247ca94af554e841a8ef0eaa238da764"
integrity sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==
strict-uri-encode@^1.0.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz#279b225df1d582b1f54e65addd4352e18faa0713"