diff --git a/lib/api/apiUtils/object/sourceChecksum.js b/lib/api/apiUtils/object/sourceChecksum.js new file mode 100644 index 0000000000..c717289325 --- /dev/null +++ b/lib/api/apiUtils/object/sourceChecksum.js @@ -0,0 +1,89 @@ +const { PassThrough } = require('stream'); +const async = require('async'); +const { jsutil } = require('arsenal'); + +const { data } = require('../../../data/wrapper'); +const ChecksumWritable = require('../../../auth/streamingV4/ChecksumWritable'); + +/** + * Sequentially GET the ordered source `dataLocator` parts into a single + * readable stream, reading them in order through `data.get`. Returns the + * PassThrough immediately; consumers should pipe it onward and observe + * `error` for any read failure along the way. + * + * @param {Array} dataLocator - ordered source parts + * @param {object} log - request logger + * @return {PassThrough} + */ +function buildSourcePartsStream(dataLocator, log) { + const passthrough = new PassThrough(); + const wrapErr = (err, part) => + Object.assign(err, { + copyPart: { key: part.key, dataStoreName: part.dataStoreName, dataStoreType: part.dataStoreType }, + }); + async.eachSeries( + dataLocator, + (part, cb) => { + const done = jsutil.once(cb); + if (part.dataStoreType === 'azure') { + // Azure's data.get writes part bytes into the provided writable + // instead of returning a Readable. Pipe a per-part PassThrough + // into the master passthrough and use its 'end' as the completion + // signal — same pattern arsenal's data.copyObject uses. + const perPart = new PassThrough(); + perPart.once('error', err => done(wrapErr(err, part))); + perPart.once('end', () => done()); + perPart.pipe(passthrough, { end: false }); + return data.get(part, perPart, log, err => { + if (err) { + perPart.destroy(err); + done(wrapErr(err, part)); + } + }); + } + return data.get(part, null, log, (err, partStream) => { + if (err) { + return done(wrapErr(err, part)); + } + partStream.once('error', err => done(wrapErr(err, part))); + partStream.once('end', () => done()); + partStream.pipe(passthrough, { end: false }); + return undefined; + }); + }, + err => { + if (err) { + passthrough.destroy(err); + } else { + passthrough.end(); + } + }, + ); + return passthrough; +} + +/** + * Compute the checksum of the (range-adjusted) source bytes by streaming them + * through a ChecksumWritable sink. An empty `dataLocator` ends the stream + * immediately, yielding the empty-input digest. + * + * @param {Array} dataLocator - ordered source parts + * @param {string} algorithm - lowercase checksum algorithm name + * @param {object} log - request logger + * @param {function} cb - cb(err, { algorithm, value }) + * @return {undefined} + */ +function computeChecksumFromDataLocator(dataLocator, algorithm, log, cb) { + const onceCb = jsutil.once(cb); + const sourceStream = buildSourcePartsStream(dataLocator || [], log); + const checksumSink = new ChecksumWritable(algorithm, log); + sourceStream.once('error', err => { + checksumSink.destroy(err); + onceCb(err); + }); + checksumSink.once('error', onceCb); + checksumSink.once('finish', () => onceCb(null, { algorithm, value: checksumSink.digest })); + sourceStream.pipe(checksumSink); +} + +module.exports = { buildSourcePartsStream, computeChecksumFromDataLocator }; diff --git a/lib/api/completeMultipartUpload.js b/lib/api/completeMultipartUpload.js index a79c4b5a74..a883495e8b 100644 --- a/lib/api/completeMultipartUpload.js +++ b/lib/api/completeMultipartUpload.js @@ -52,15 +52,19 @@ const allChecksumXmlTags = Object.values(checksumAlgorithms).map(algo => algo.xm * does not match the stored part's ChecksumValue, return InvalidPart. * - If checksumType === 'COMPOSITE' and checksumIsDefault is false, every part * in the request body MUST include the matching Checksum field; - * missing → InvalidRequest. + * missing → InvalidRequest. (Relaxed for external backends, which store no + * per-part checksum - but a checksum the client does submit is still checked, + * and rejected, since there is no stored value to match.) * * @param {object} jsonList - parsed CompleteMultipartUpload XML * @param {array} storedParts - parts as returned by services.getMPUparts * @param {string} mpuSplitter - splitter used in part keys * @param {object} mpuChecksum - { algorithm, type, isDefault } + * @param {boolean} isExternal - external-backend MPU; relax the COMPOSITE + * per-part requirement (external parts carry no stored checksum) * @returns {Error|null} */ -function validatePerPartChecksums(jsonList, storedParts, mpuSplitter, mpuChecksum) { +function validatePerPartChecksums(jsonList, storedParts, mpuSplitter, mpuChecksum, isExternal) { const mpuAlgo = mpuChecksum.algorithm; if (!mpuAlgo) { // Legacy / pre-checksums MPU, no algorithm tracked, nothing to validate. @@ -68,7 +72,10 @@ function validatePerPartChecksums(jsonList, storedParts, mpuSplitter, mpuChecksu } const expectedTag = checksumAlgorithms[mpuAlgo] ? checksumAlgorithms[mpuAlgo].xmlTag : null; // Skip enforcement if the MPU's algorithm is unknown (shouldn't happen). - const requireForEachPart = mpuChecksum.type === 'COMPOSITE' && !mpuChecksum.isDefault && expectedTag !== null; + // External backends store no per-part checksum, so don't require one; a + // checksum the client does submit is still rejected below (no stored value). + const requireForEachPart = mpuChecksum.type === 'COMPOSITE' + && !mpuChecksum.isDefault && expectedTag !== null && !isExternal; const storedByPartNumber = new Map(); storedParts.forEach(item => { @@ -462,7 +469,10 @@ function completeMultipartUpload(authInfo, request, log, callback) { type: storedMetadata.checksumType, isDefault: storedMetadata.checksumIsDefault, }; - const checksumErr = validatePerPartChecksums(jsonList, storedParts, splitter, mpuChecksum); + const isExternalMpu = !!constants.externalBackends[ + config.getLocationConstraintType(location)]; + const checksumErr = validatePerPartChecksums( + jsonList, storedParts, splitter, mpuChecksum, isExternalMpu); if (checksumErr) { log.debug('per-part checksum validation failed', { error: checksumErr, diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index 13b9628cf8..548a22d5af 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -1,5 +1,4 @@ const async = require('async'); -const { PassThrough } = require('stream'); const { errors, errorInstances, jsutil, versioning, s3middleware, s3routes } = require('arsenal'); const { validateObjectKeyLength } = s3routes.routesUtils; @@ -33,6 +32,7 @@ const { } = require('./apiUtils/integrity/validateChecksums'); const ChecksumTransform = require('../auth/streamingV4/ChecksumTransform'); const ChecksumWritable = require('../auth/streamingV4/ChecksumWritable'); +const { buildSourcePartsStream } = require('./apiUtils/object/sourceChecksum'); const kms = require('../kms/wrapper'); const versionIdUtils = versioning.VersionID; @@ -66,63 +66,6 @@ function _orphanedDataLocations(dataToDelete, newDataGetInfo) { return orphans.length > 0 ? orphans : null; } -/** - * Concatenate the source object's parts into a single Readable stream by - * reading them sequentially through `data.get`. Returns the PassThrough - * immediately; consumers should pipe it to the next stage and observe - * `error` on this stream for any read failure along the way. - * - * @param {Array} dataLocator - ordered source parts - * @param {object} log - request logger - * @return {PassThrough} - */ -function _pipeSourcePartsThrough(dataLocator, log) { - const passthrough = new PassThrough(); - const wrapErr = (err, part) => - Object.assign(err, { - copyPart: { key: part.key, dataStoreName: part.dataStoreName, dataStoreType: part.dataStoreType }, - }); - async.eachSeries( - dataLocator, - (part, cb) => { - const done = jsutil.once(cb); - if (part.dataStoreType === 'azure') { - // Azure's data.get writes part bytes into the provided writable - // instead of returning a Readable. Pipe a per-part PassThrough - // into the master passthrough and use its 'end' as the completion - // signal — same pattern arsenal's data.copyObject uses. - const perPart = new PassThrough(); - perPart.once('error', err => done(wrapErr(err, part))); - perPart.once('end', () => done()); - perPart.pipe(passthrough, { end: false }); - return data.get(part, perPart, log, err => { - if (err) { - perPart.destroy(err); - done(wrapErr(err, part)); - } - }); - } - return data.get(part, null, log, (err, partStream) => { - if (err) { - return done(wrapErr(err, part)); - } - partStream.once('error', err => done(wrapErr(err, part))); - partStream.once('end', () => done()); - partStream.pipe(passthrough, { end: false }); - return undefined; - }); - }, - err => { - if (err) { - passthrough.destroy(err); - } else { - passthrough.end(); - } - }, - ); - return passthrough; -} - /** * Decide whether the destination's checksum needs to be recomputed by * streaming the source bytes through a ChecksumTransform. @@ -207,7 +150,7 @@ function _recomputeChecksumAndStore( algorithm: algoName, size: storeMetadataParams.size, }); - const sourceStream = _pipeSourcePartsThrough(dataLocator, log); + const sourceStream = buildSourcePartsStream(dataLocator, log); const checksumSink = new ChecksumWritable(algoName, log); const finish = jsutil.once(err => { if (err) { @@ -236,7 +179,7 @@ function _recomputeChecksumAndStore( // Stream source bytes through a ChecksumTransform and write them out as a single put. log.debug('recomputing checksum on CopyObject', { algorithm: algoName, size: storeMetadataParams.size }); - const sourceStream = _pipeSourcePartsThrough(dataLocator, log); + const sourceStream = buildSourcePartsStream(dataLocator, log); const checksumStream = new ChecksumTransform(algoName, undefined, false, log); const done = jsutil.once((err, results) => { if (err) { @@ -362,11 +305,14 @@ function _prepMetadata( }; } // Cannot copy from same source and destination if no MD - // changed and no source version id + // changed and no source version id. A requested checksum algorithm counts + // as a change (it (re)computes the object's checksum in place), matching + // AWS, so it is excluded from this guard. if ( sourceIsDestination && whichMetadata === 'COPY' && Object.keys(overrideMetadata).length === 0 && + !headers['x-amz-checksum-algorithm'] && !sourceVersionId ) { return { diff --git a/lib/api/objectPutCopyPart.js b/lib/api/objectPutCopyPart.js index 3f3999b212..362f6a12af 100644 --- a/lib/api/objectPutCopyPart.js +++ b/lib/api/objectPutCopyPart.js @@ -1,5 +1,5 @@ const async = require('async'); -const { errors, errorInstances, versioning, s3middleware } = require('arsenal'); +const { errors, errorInstances, versioning, s3middleware, models, jsutil } = require('arsenal'); const validateHeaders = s3middleware.validateConditionalHeaders; const collectCorsHeaders = require('../utilities/collectCorsHeaders'); @@ -17,11 +17,92 @@ const { verifyColdObjectAvailable } = require('./apiUtils/object/coldStorage'); const { validateQuotas } = require('./apiUtils/quotas/quotaUtils'); const { setSSEHeaders } = require('./apiUtils/object/sseHeaders'); const { initializeInternalLogRequestQueue, queueInternalLogRequest } = require('../utilities/serverAccessLogger'); +const { algorithms } = require('./apiUtils/integrity/validateChecksums'); +const { buildSourcePartsStream, computeChecksumFromDataLocator } = require('./apiUtils/object/sourceChecksum'); +const { config } = require('../Config'); +const kms = require('../kms/wrapper'); +const ChecksumTransform = require('../auth/streamingV4/ChecksumTransform'); const versionIdUtils = versioning.VersionID; +const { BackendInfo } = models; const skipError = new Error('skip'); +function _shouldRecomputeChecksum(request, sourceChecksum, algo) { + if (request.headers['x-amz-copy-source-range']) { + return true; + } + return !(sourceChecksum + && sourceChecksum.checksumType === 'FULL_OBJECT' + && sourceChecksum.checksumAlgorithm === algo); +} + +/** + * Copy a part and calculate its checksum in a single streaming pass. + * + * @param {Array} dataLocator - ordered (range-adjusted) source parts + * @param {number} size - copied part size in bytes + * @param {object|null} sse - server-side encryption config for the dest MPU + * @param {string} destLocationConstraint - destination MPU location constraint + * @param {object} dataStoreContext - destination object data-store context + * @param {string} algo - checksum algorithm to compute + * @param {object} log - request logger + * @param {function} cb - cb(err, { locations, totalHash, checksum }) + * @return {undefined} + */ +function _copyPartStreamingWithChecksum(dataLocator, size, sse, destLocationConstraint, + dataStoreContext, algo, log, cb) { + log.debug('recomputing checksum on UploadPartCopy', { algorithm: algo, size }); + const wrapChecksumErr = err => Object.assign(err, { checksumStream: { algorithm: algo } }); + const backendInfo = new BackendInfo(config, destLocationConstraint); + const sourceStream = buildSourcePartsStream(dataLocator, log); + const checksumStream = new ChecksumTransform(algo, undefined, false, log); + const done = jsutil.once((err, result) => { + if (err) { + sourceStream.destroy(err); + checksumStream.destroy(err); + return cb(err); + } + return cb(null, result); + }); + sourceStream.once('error', done); + checksumStream.once('error', err => done(wrapChecksumErr(err))); + sourceStream.pipe(checksumStream); + const doPut = cipherBundle => + data.put(cipherBundle, checksumStream, size, dataStoreContext, backendInfo, log, + (err, dataRetrievalInfo, hashedStream) => { + if (err) { + return done(err); + } + const location = { + key: dataRetrievalInfo.key, + dataStoreName: dataRetrievalInfo.dataStoreName, + dataStoreETag: hashedStream.completedHash, + size, + }; + if (cipherBundle) { + location.sseCryptoScheme = cipherBundle.cryptoScheme; + location.sseCipheredDataKey = cipherBundle.cipheredDataKey; + location.sseAlgorithm = cipherBundle.algorithm; + location.sseMasterKeyId = cipherBundle.masterKeyId; + } + return done(null, { + locations: [location], + totalHash: hashedStream.completedHash, + checksum: { algorithm: algo, value: checksumStream.digest }, + }); + }); + if (sse && sse.algorithm) { + return kms.createCipherBundle(sse, log, (err, cipherBundle) => { + if (err) { + return done(err); + } + return doPut(cipherBundle); + }); + } + return doPut(null); +} + /** * PUT Part Copy during a multipart upload. * @param {AuthInfo} authInfo - Instance of AuthInfo class with @@ -220,12 +301,13 @@ function objectPutCopyPart(authInfo, request, sourceBucket, sourceLocationConstraintName, sourceObjMD, next) { return validateQuotas(request, destBucketMD, request.accountQuotas, valPutParams.requestType, request.apiMethod, sourceObjMD?.['content-length'] || 0, false, log, err => - next(err, dataLocator, destBucketMD, copyObjectSize, sourceVerId, sourceLocationConstraintName)); + next(err, dataLocator, destBucketMD, copyObjectSize, sourceVerId, + sourceLocationConstraintName, sourceObjMD)); }, // get MPU shadow bucket to get splitter based on MD version function getMpuShadowBucket(dataLocator, destBucketMD, copyObjectSize, sourceVerId, - sourceLocationConstraintName, next) { + sourceLocationConstraintName, sourceObjMD, next) { return metadata.getBucket(mpuBucketName, log, (err, mpuBucket) => { // TODO: move to `.is` once BKTCLT-9 is done and bumped in Cloudserver @@ -245,18 +327,18 @@ function objectPutCopyPart(authInfo, request, sourceBucket, } return next(null, dataLocator, destBucketMD, copyObjectSize, sourceVerId, splitter, - sourceLocationConstraintName); + sourceLocationConstraintName, sourceObjMD); }); }, // Get MPU overview object to check authorization to put a part // and to get any object location constraint info function getMpuOverviewObject(dataLocator, destBucketMD, copyObjectSize, sourceVerId, splitter, - sourceLocationConstraintName, next) { + sourceLocationConstraintName, sourceObjMD, next) { const mpuOverviewKey = `overview${splitter}${destObjectKey}${splitter}${uploadId}`; return metadata.getObjectMD(mpuBucketName, mpuOverviewKey, - null, log, (err, res) => { + null, log, (err, mpuOverviewMD) => { if (err) { // TODO: move to `.is` once BKTCLT-9 is done and bumped in Cloudserver if (err.NoSuchKey) { @@ -270,22 +352,23 @@ function objectPutCopyPart(authInfo, request, sourceBucket, }); return next(err); } - const initiatorID = res.initiator.ID; + const initiatorID = mpuOverviewMD.initiator.ID; const requesterID = authInfo.isRequesterAnIAMUser() ? authInfo.getArn() : authInfo.getCanonicalID(); if (initiatorID !== requesterID) { return next(errors.AccessDenied); } const destObjLocationConstraint = - res.controllingLocationConstraint; - const sseAlgo = res['x-amz-server-side-encryption']; + mpuOverviewMD.controllingLocationConstraint; + const sseAlgo = mpuOverviewMD['x-amz-server-side-encryption']; const sse = sseAlgo ? { algorithm: sseAlgo, - masterKeyId: res['x-amz-server-side-encryption-aws-kms-key-id'], + masterKeyId: mpuOverviewMD['x-amz-server-side-encryption-aws-kms-key-id'], } : null; return next(null, dataLocator, destBucketMD, destObjLocationConstraint, copyObjectSize, - sourceVerId, sourceLocationConstraintName, sse, splitter); + sourceVerId, sourceLocationConstraintName, sse, splitter, + sourceObjMD, mpuOverviewMD); }); }, function goGetData( @@ -297,12 +380,35 @@ function objectPutCopyPart(authInfo, request, sourceBucket, sourceLocationConstraintName, sse, splitter, + sourceObjMD, + mpuOverviewMD, next, ) { const originalIdentityAuthzResults = request.actionImplicitDenies; // eslint-disable-next-line no-param-reassign delete request.actionImplicitDenies; - data.uploadPartCopy( + + const algo = mpuOverviewMD.checksumAlgorithm; + const recompute = algo && _shouldRecomputeChecksum(request, sourceObjMD.checksum, algo); + // External backends need their own MPU API, so _copyPartStreamingWithChecksum (data.put) can't be used. + const destIsExternal = constants.externalBackends[ + config.getLocationConstraintType(destObjLocationConstraint)]; + if (recompute && dataLocator.length > 0 && !destIsExternal) { + return _copyPartStreamingWithChecksum(dataLocator, copyObjectSize, sse, + destObjLocationConstraint, dataStoreContext, algo, log, (err, result) => { + // eslint-disable-next-line no-param-reassign + request.actionImplicitDenies = originalIdentityAuthzResults; + if (err) { + // eslint-disable-next-line no-param-reassign + request.sourceServerAccessLog && (request.sourceServerAccessLog.error = err); + return next(err, destBucketMD); + } + return next(null, destBucketMD, result.locations, result.totalHash, copyObjectSize, + sourceVerId, sse, new Date().toJSON(), splitter, mpuOverviewMD, result.checksum); + }); + } + + return data.uploadPartCopy( request, log, destBucketMD, @@ -315,24 +421,46 @@ function objectPutCopyPart(authInfo, request, sourceBucket, (error, eTag, lastModified, serverSideEncryption, locations) => { // eslint-disable-next-line no-param-reassign request.actionImplicitDenies = originalIdentityAuthzResults; - if (error) { - if (error.message === 'skip') { - return next(skipError, destBucketMD, eTag, - lastModified, sourceVerId, - serverSideEncryption); - } + const isSkip = error && error.message === 'skip'; + if (error && !isSkip) { // eslint-disable-next-line no-param-reassign request.sourceServerAccessLog && (request.sourceServerAccessLog.error = error); return next(error, destBucketMD); } - return next(null, destBucketMD, locations, eTag, - copyObjectSize, sourceVerId, serverSideEncryption, - lastModified, splitter); + + // Skip checksum compute for external backends matching UploadPart. + if (recompute && !destIsExternal) { + return computeChecksumFromDataLocator(dataLocator, algo, log, (cksErr, partChecksum) => { + if (cksErr) { + // eslint-disable-next-line no-param-reassign + request.sourceServerAccessLog && (request.sourceServerAccessLog.error = cksErr); + return next(cksErr, destBucketMD); + } + if (isSkip) { + return next(skipError, destBucketMD, eTag, lastModified, sourceVerId, + serverSideEncryption, undefined, undefined, mpuOverviewMD, partChecksum); + } + return next(null, destBucketMD, locations, eTag, copyObjectSize, sourceVerId, + serverSideEncryption, lastModified, splitter, mpuOverviewMD, partChecksum); + }); + } + + // Reuse the source's stored checksum, or none for a legacy or + // external-backend MPU. + const partChecksum = (algo && !destIsExternal) + ? { algorithm: algo, value: sourceObjMD.checksum.checksumValue } + : undefined; + if (isSkip) { + return next(skipError, destBucketMD, eTag, lastModified, sourceVerId, + serverSideEncryption, undefined, undefined, mpuOverviewMD, partChecksum); + } + return next(null, destBucketMD, locations, eTag, copyObjectSize, sourceVerId, + serverSideEncryption, lastModified, splitter, mpuOverviewMD, partChecksum); }); }, function getExistingPartInfo(destBucketMD, locations, totalHash, copyObjectSize, sourceVerId, serverSideEncryption, lastModified, - splitter, next) { + splitter, mpuOverviewMD, partChecksum, next) { const partKey = `${uploadId}${constants.splitter}${paddedPartNumber}`; metadata.getObjectMD(mpuBucketName, partKey, {}, log, @@ -357,12 +485,13 @@ function objectPutCopyPart(authInfo, request, sourceBucket, } return next(null, destBucketMD, locations, totalHash, prevObjectSize, copyObjectSize, sourceVerId, - serverSideEncryption, lastModified, oldLocations, splitter); + serverSideEncryption, lastModified, oldLocations, splitter, + mpuOverviewMD, partChecksum); }); }, function storeNewPartMetadata(destBucketMD, locations, totalHash, prevObjectSize, copyObjectSize, sourceVerId, serverSideEncryption, - lastModified, oldLocations, splitter, next) { + lastModified, oldLocations, splitter, mpuOverviewMD, partChecksum, next) { const metaStoreParams = { partNumber: paddedPartNumber, contentMD5: totalHash, @@ -373,6 +502,10 @@ function objectPutCopyPart(authInfo, request, sourceBucket, overheadField: constants.overheadField, ownerId: destBucketMD.getOwner(), }; + if (partChecksum) { + metaStoreParams.checksumValue = partChecksum.value; + metaStoreParams.checksumAlgorithm = partChecksum.algorithm; + } return services.metadataStorePart(mpuBucketName, locations, metaStoreParams, log, err => { if (err) { @@ -382,16 +515,17 @@ function objectPutCopyPart(authInfo, request, sourceBucket, } return next(null, locations, oldLocations, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize, splitter); + prevObjectSize, copyObjectSize, splitter, + mpuOverviewMD, partChecksum); }); }, function checkCanDeleteOldLocations(partLocations, oldLocations, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize, splitter, next) { + prevObjectSize, copyObjectSize, splitter, mpuOverviewMD, partChecksum, next) { if (!oldLocations) { return next(null, oldLocations, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize); + prevObjectSize, copyObjectSize, mpuOverviewMD, partChecksum); } return services.isCompleteMPUInProgress({ bucketName: destBucketName, @@ -420,12 +554,12 @@ function objectPutCopyPart(authInfo, request, sourceBucket, } return next(null, oldLocationsToDelete, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize); + prevObjectSize, copyObjectSize, mpuOverviewMD, partChecksum); }); }, function cleanupExistingData(oldLocationsToDelete, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize, next) { + prevObjectSize, copyObjectSize, mpuOverviewMD, partChecksum, next) { // Clean up the old data now that new metadata (with new // data locations) has been stored if (oldLocationsToDelete) { @@ -440,15 +574,16 @@ function objectPutCopyPart(authInfo, request, sourceBucket, } return next(null, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize); + prevObjectSize, copyObjectSize, mpuOverviewMD, partChecksum); }); } return next(null, destBucketMD, totalHash, lastModified, sourceVerId, serverSideEncryption, - prevObjectSize, copyObjectSize); + prevObjectSize, copyObjectSize, mpuOverviewMD, partChecksum); }, ], (err, destBucketMD, totalHash, lastModified, sourceVerId, - serverSideEncryption, prevObjectSize, copyObjectSize) => { + serverSideEncryption, prevObjectSize, copyObjectSize, + mpuOverviewMD, partChecksum) => { const corsHeaders = collectCorsHeaders(request.headers.origin, request.method, destBucketMD); @@ -481,8 +616,17 @@ function objectPutCopyPart(authInfo, request, sourceBucket, '', new Date(lastModified) .toISOString(), '', '"', totalHash, '"', - '', - ].join(''); + ]; + // Surface the part checksum only for non-default MPUs like AWS. + if (partChecksum && !mpuOverviewMD.checksumIsDefault) { + const xmlTag = algorithms[partChecksum.algorithm] + && algorithms[partChecksum.algorithm].xmlTag; + if (xmlTag) { + xml.push(`<${xmlTag}>`, partChecksum.value, ``); + } + } + xml.push(''); + const xmlStr = xml.join(''); const additionalHeaders = corsHeaders || {}; if (serverSideEncryption) { @@ -502,8 +646,11 @@ function objectPutCopyPart(authInfo, request, sourceBucket, }); monitoring.promMetrics( 'PUT', destBucketName, '200', 'putObjectCopyPart'); - return callback(null, xml, additionalHeaders); + return callback(null, xmlStr, additionalHeaders); }); } module.exports = objectPutCopyPart; +// exported for unit tests +module.exports._shouldRecomputeChecksum = _shouldRecomputeChecksum; +module.exports._copyPartStreamingWithChecksum = _copyPartStreamingWithChecksum; diff --git a/lib/api/objectPutPart.js b/lib/api/objectPutPart.js index f115b8b727..aca2222dd7 100644 --- a/lib/api/objectPutPart.js +++ b/lib/api/objectPutPart.js @@ -44,6 +44,12 @@ function _getPartKey(uploadId, splitter, paddedPartNumber) { return `${uploadId}${splitter}${paddedPartNumber}`; } +function checksumTypeMismatchErr(expected, actual) { + return errors.InvalidRequest.customizeDescription( + `Checksum Type mismatch occurred, expected checksum Type: ${expected}, ` + + `actual checksum Type: ${actual}`); +} + /** * PUT part of object during a multipart upload. Steps include: * validating metadata for authorization, bucket existence @@ -118,7 +124,9 @@ function objectPutPart(authInfo, request, streamingV4Params, log, const requestType = request.apiMethods || 'objectPutPart'; let partChecksum; let mpuChecksumAlgo; + let mpuChecksumType; let mpuChecksumIsDefault; + let clientSuppliedChecksum; return async.waterfall([ // Get the destination bucket. @@ -203,6 +211,7 @@ function objectPutPart(authInfo, request, streamingV4Params, log, } mpuChecksumAlgo = res.checksumAlgorithm; + mpuChecksumType = res.checksumType; mpuChecksumIsDefault = res.checksumIsDefault; const objectLocationConstraint = @@ -330,16 +339,21 @@ function objectPutPart(authInfo, request, streamingV4Params, log, if (headerChecksum && headerChecksum.error) { return next(arsenalErrorFromChecksumError(headerChecksum), destinationBucket); } + // Whether the client sent a per-part checksum header (vs. one the + // server computes implicitly). Drives whether we echo it back. + clientSuppliedChecksum = !!headerChecksum; // If the MPU specifies a non-default checksum algo and the // client sends a different algo, reject the request. if (headerChecksum && mpuChecksumAlgo && !mpuChecksumIsDefault && headerChecksum.algorithm !== mpuChecksumAlgo) { - return next(errors.InvalidRequest.customizeDescription( - `Checksum algorithm '${headerChecksum.algorithm}' is not the same ` + - `as the checksum algorithm '${mpuChecksumAlgo}' specified during ` + - 'CreateMultipartUpload.' - ), destinationBucket); + return next(checksumTypeMismatchErr(mpuChecksumAlgo, headerChecksum.algorithm), destinationBucket); + } + + // A COMPOSITE MPU's final checksum is composed from the per-part + // checksums, so every part must carry one. + if (!headerChecksum && mpuChecksumType === 'COMPOSITE') { + return next(checksumTypeMismatchErr(mpuChecksumAlgo, 'null'), destinationBucket); } const primaryAlgo = mpuChecksumAlgo || 'crc64nvme'; @@ -500,7 +514,9 @@ function objectPutPart(authInfo, request, streamingV4Params, log, 'putObjectPart'); return cb(err, null, corsHeaders); } - if (partChecksum) { + // Surface the part checksum unless it is the server-computed default. + // A client-supplied checksum, and any explicit-algorithm MPU, is still echoed - matching AWS. + if (partChecksum && (!mpuChecksumIsDefault || clientSuppliedChecksum)) { const { algorithm, value } = partChecksum; corsHeaders[`x-amz-checksum-${algorithm}`] = value; } diff --git a/lib/services.js b/lib/services.js index 3d515e7f8d..5d07c79753 100644 --- a/lib/services.js +++ b/lib/services.js @@ -858,8 +858,18 @@ const services = { */ metadataStorePart(mpuBucketName, partLocations, metaStoreParams, log, cb) { assert.strictEqual(typeof mpuBucketName, 'string'); - const { partNumber, contentMD5, size, uploadId, lastModified, splitter, overheadField, ownerId } = - metaStoreParams; + const { + partNumber, + contentMD5, + size, + uploadId, + lastModified, + splitter, + overheadField, + ownerId, + checksumValue, + checksumAlgorithm, + } = metaStoreParams; const dateModified = typeof lastModified === 'string' ? lastModified : new Date().toJSON(); assert.strictEqual(typeof splitter, 'string'); const partKey = `${uploadId}${splitter}${partNumber}`; @@ -874,6 +884,10 @@ const services = { 'content-length': size, 'owner-id': ownerId, }; + if (checksumValue && checksumAlgorithm) { + omVal.checksumValue = checksumValue; + omVal.checksumAlgorithm = checksumAlgorithm; + } const params = {}; if (overheadField) { diff --git a/tests/functional/aws-node-sdk/test/object/completeMpuChecksum.js b/tests/functional/aws-node-sdk/test/object/completeMpuChecksum.js index 8ed43e9a80..e3b1fc7d64 100644 --- a/tests/functional/aws-node-sdk/test/object/completeMpuChecksum.js +++ b/tests/functional/aws-node-sdk/test/object/completeMpuChecksum.js @@ -184,6 +184,65 @@ describe('CompleteMultipartUpload final-object checksum', () => assert.strictEqual(head.ChecksumType, 'FULL_OBJECT'); }); + describe('SDK-style checksum forwarding', () => { + let fwdS3; + const checksumFields = [ + 'ChecksumCRC32', 'ChecksumCRC32C', 'ChecksumCRC64NVME', + 'ChecksumSHA1', 'ChecksumSHA256', + ]; + // explicit algorithms + the no-algorithm default (null) + const configs = ['CRC32', 'CRC32C', 'CRC64NVME', 'SHA1', 'SHA256', null]; + + before(() => { + // WHEN_REQUIRED: the SDK sends a per-part checksum only when we + // explicitly provide one, and nothing for the default MPU. + fwdS3 = new BucketUtility('default', { + ...sigCfg, + requestChecksumCalculation: 'WHEN_REQUIRED', + responseChecksumValidation: 'WHEN_REQUIRED', + }).s3; + }); + + configs.forEach(algo => { + const label = algo || 'no algorithm (default)'; + it(`should forward the UploadPart checksum and complete (${label})`, async () => { + const key = `complete-forward-${(algo || 'default').toLowerCase()}-${Date.now()}`; + const create = await fwdS3.send(new CreateMultipartUploadCommand({ + Bucket: bucket, Key: key, + ...(algo ? { ChecksumAlgorithm: algo } : {}), + })); + + const uploadParams = { + Bucket: bucket, Key: key, UploadId: create.UploadId, + PartNumber: 1, Body: partBody, + }; + // Explicit-algo MPUs require the matching per-part checksum. + if (algo) { + uploadParams[tagField(algo)] = await algorithms[algo.toLowerCase()].digest(partBody); + } + const uploadPart = await fwdS3.send(new UploadPartCommand(uploadParams)); + + // Forward whatever checksum the UploadPart response surfaced. + const completedPart = { PartNumber: 1, ETag: uploadPart.ETag }; + checksumFields.forEach(f => { + if (uploadPart[f] !== undefined) { + completedPart[f] = uploadPart[f]; + } + }); + + const complete = await fwdS3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: create.UploadId, + MultipartUpload: { Parts: [completedPart] }, + })); + const expectedField = algo ? tagField(algo) : 'ChecksumCRC64NVME'; + assert( + complete[expectedField], + `expected ${expectedField} on CompleteMPU response, got: ${JSON.stringify(complete)}`, + ); + }); + }); + }); + // AWS S3 rejects any per-part // Checksum field on a default MPU (one created without an // explicit ChecksumAlgorithm) with InvalidPart — even when the diff --git a/tests/functional/aws-node-sdk/test/object/copyPartChecksum.js b/tests/functional/aws-node-sdk/test/object/copyPartChecksum.js new file mode 100644 index 0000000000..2cd8114cf9 --- /dev/null +++ b/tests/functional/aws-node-sdk/test/object/copyPartChecksum.js @@ -0,0 +1,299 @@ +const assert = require('assert'); +const { + CreateBucketCommand, + PutObjectCommand, + CreateMultipartUploadCommand, + UploadPartCommand, + UploadPartCopyCommand, + CompleteMultipartUploadCommand, + ListPartsCommand, + AbortMultipartUploadCommand, + DeleteBucketCommand, +} = require('@aws-sdk/client-s3'); + +const withV4 = require('../support/withV4'); +const BucketUtility = require('../../lib/utility/bucket-util'); +const { algorithms } = require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); + +const bucket = `copypart-checksum-${Date.now()}`; +const sourceKey = 'copypart-checksum-source'; +const sourceBody = Buffer.from('UploadPartCopy checksum source content', 'utf8'); +const bigSourceKey = 'copypart-checksum-big-source'; +const bigBody = Buffer.alloc(5 * 1024 * 1024, 0x61); + +// algo -> the SDK CopyPartResult / CompletedPart field name +const field = algo => `Checksum${algo}`; +const allFields = ['CRC32', 'CRC32C', 'CRC64NVME', 'SHA1', 'SHA256'].map(field); +const digest = (algo, body) => algorithms[algo.toLowerCase()].digest(body); + +describe('UploadPartCopy checksums', () => + withV4(sigCfg => { + let s3; + let bucketUtil; + const openUploads = []; + + before(async () => { + // WHEN_REQUIRED so the SDK does not auto-attach checksums on + // CreateMPU/CompleteMPU and muddy the assertions. UploadPartCopy + // itself never sends a body checksum. + bucketUtil = new BucketUtility('default', { + ...sigCfg, + requestChecksumCalculation: 'WHEN_REQUIRED', + responseChecksumValidation: 'WHEN_REQUIRED', + }); + s3 = bucketUtil.s3; + await s3.send(new CreateBucketCommand({ Bucket: bucket })); + // Source stored without a checksum, so the copy always recomputes. + await s3.send(new PutObjectCommand({ Bucket: bucket, Key: sourceKey, Body: sourceBody })); + await s3.send(new PutObjectCommand({ Bucket: bucket, Key: bigSourceKey, Body: bigBody })); + }); + + after(async () => { + await Promise.all(openUploads.map(u => + s3.send(new AbortMultipartUploadCommand({ + Bucket: bucket, Key: u.key, UploadId: u.uploadId, + })).catch(() => undefined))); + await bucketUtil.empty(bucket); + await s3.send(new DeleteBucketCommand({ Bucket: bucket })); + }); + + async function createMpu(key, opts = {}) { + const res = await s3.send(new CreateMultipartUploadCommand({ + Bucket: bucket, Key: key, ...opts, + })); + openUploads.push({ key, uploadId: res.UploadId }); + return res.UploadId; + } + + function copyPart(key, uploadId, extra = {}) { + return s3.send(new UploadPartCopyCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + PartNumber: 1, CopySource: `${bucket}/${sourceKey}`, ...extra, + })); + } + + ['CRC32', 'CRC32C', 'CRC64NVME', 'SHA1', 'SHA256'].forEach(algo => { + it(`should return the recomputed ${algo} checksum in CopyPartResult`, async () => { + const key = `cpr-${algo}`; + const uploadId = await createMpu(key, { ChecksumAlgorithm: algo }); + const res = await copyPart(key, uploadId); + assert.strictEqual(res.CopyPartResult[field(algo)], await digest(algo, sourceBody)); + }); + }); + + it('should not return any checksum in CopyPartResult for a default MPU', async () => { + const key = 'cpr-default'; + const uploadId = await createMpu(key); + const res = await copyPart(key, uploadId); + assert(res.CopyPartResult.ETag); + allFields.forEach(f => assert.strictEqual(res.CopyPartResult[f], undefined, + `default MPU CopyPartResult should not include ${f}`)); + }); + + it('should recompute in the MPU algorithm when the source has a different one', async () => { + // Source stored with CRC32; destination MPU is SHA256 -> recompute. + const srcCrc32 = 'copypart-checksum-source-crc32'; + await s3.send(new PutObjectCommand({ + Bucket: bucket, Key: srcCrc32, Body: sourceBody, ChecksumAlgorithm: 'CRC32', + })); + const key = 'cpr-mismatch'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'SHA256' }); + const res = await copyPart(key, uploadId, { CopySource: `${bucket}/${srcCrc32}` }); + assert.strictEqual(res.CopyPartResult.ChecksumSHA256, await digest('SHA256', sourceBody)); + }); + + it('should checksum only the copied byte range', async () => { + const key = 'cpr-range'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32' }); + const res = await copyPart(key, uploadId, { CopySourceRange: 'bytes=0-3' }); + assert.strictEqual(res.CopyPartResult.ChecksumCRC32, + await digest('CRC32', sourceBody.subarray(0, 4))); + }); + + it('should checksum a 0-byte copied part', async () => { + const emptyKey = 'copypart-checksum-empty'; + await s3.send(new PutObjectCommand({ Bucket: bucket, Key: emptyKey, Body: '' })); + const key = 'cpr-empty'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32' }); + const res = await copyPart(key, uploadId, { CopySource: `${bucket}/${emptyKey}` }); + assert.strictEqual(res.CopyPartResult.ChecksumCRC32, await digest('CRC32', Buffer.alloc(0))); + }); + + [ + { algo: 'CRC32', type: 'COMPOSITE' }, + { algo: 'CRC32', type: 'FULL_OBJECT' }, + { algo: 'CRC32C', type: 'FULL_OBJECT' }, + { algo: 'SHA1', type: 'COMPOSITE' }, + { algo: 'SHA256', type: 'COMPOSITE' }, + { algo: 'CRC64NVME', type: 'FULL_OBJECT' }, + ].forEach(({ algo, type }) => { + it(`should complete an MPU with a copied part (${algo}/${type})`, async () => { + const key = `cmp-${algo}-${type}`; + const uploadId = await createMpu(key, { ChecksumAlgorithm: algo, ChecksumType: type }); + const copy = await copyPart(key, uploadId); + const partChecksum = copy.CopyPartResult[field(algo)]; + assert(partChecksum, `expected ${field(algo)} in CopyPartResult`); + const complete = await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { + Parts: [{ + PartNumber: 1, + ETag: copy.CopyPartResult.ETag, + [field(algo)]: partChecksum, + }], + }, + })); + assert.strictEqual(complete.ChecksumType, type); + assert(complete[field(algo)], `expected final ${field(algo)} on CompleteMPU response`); + if (type === 'COMPOSITE') { + assert(complete[field(algo)].endsWith('-1'), + `expected -1 suffix for 1-part COMPOSITE, got ${complete[field(algo)]}`); + } + }); + }); + + it('should complete a default MPU with a copied part', async () => { + const key = 'cmp-default'; + const uploadId = await createMpu(key); + const copy = await copyPart(key, uploadId); + const complete = await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [{ PartNumber: 1, ETag: copy.CopyPartResult.ETag }] }, + })); + assert(complete.ChecksumCRC64NVME, 'expected default-MPU final ChecksumCRC64NVME'); + assert.strictEqual(complete.ChecksumType, 'FULL_OBJECT'); + }); + + it('should surface the copied part checksum in ListParts for an explicit MPU', async () => { + const key = 'lp-explicit'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32' }); + await copyPart(key, uploadId); + const list = await s3.send(new ListPartsCommand({ Bucket: bucket, Key: key, UploadId: uploadId })); + assert.strictEqual(list.Parts[0].ChecksumCRC32, await digest('CRC32', sourceBody)); + }); + + it('should not surface a checksum in ListParts for a default MPU', async () => { + const key = 'lp-default'; + const uploadId = await createMpu(key); + await copyPart(key, uploadId); + const list = await s3.send(new ListPartsCommand({ Bucket: bucket, Key: key, UploadId: uploadId })); + allFields.forEach(f => assert.strictEqual(list.Parts[0][f], undefined, + `default MPU ListParts should not include ${f}`)); + }); + + it('should complete a multi-part COMPOSITE MPU of copied parts with the -N suffix', async () => { + const key = 'cmp-multi-composite'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32', ChecksumType: 'COMPOSITE' }); + const p1 = await copyPart(key, uploadId, { PartNumber: 1, CopySource: `${bucket}/${bigSourceKey}` }); + const p2 = await copyPart(key, uploadId, { PartNumber: 2 }); + const complete = await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [ + { PartNumber: 1, ETag: p1.CopyPartResult.ETag, ChecksumCRC32: p1.CopyPartResult.ChecksumCRC32 }, + { PartNumber: 2, ETag: p2.CopyPartResult.ETag, ChecksumCRC32: p2.CopyPartResult.ChecksumCRC32 }, + ] }, + })); + assert.strictEqual(complete.ChecksumType, 'COMPOSITE'); + assert(complete.ChecksumCRC32.endsWith('-2'), + `expected -2 suffix for a 2-part COMPOSITE, got ${complete.ChecksumCRC32}`); + }); + + it('should complete a multi-part FULL_OBJECT MPU of copied parts with the linear digest', async () => { + const key = 'cmp-multi-full'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32', ChecksumType: 'FULL_OBJECT' }); + const p1 = await copyPart(key, uploadId, { PartNumber: 1, CopySource: `${bucket}/${bigSourceKey}` }); + const p2 = await copyPart(key, uploadId, { PartNumber: 2 }); + const complete = await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [ + { PartNumber: 1, ETag: p1.CopyPartResult.ETag, ChecksumCRC32: p1.CopyPartResult.ChecksumCRC32 }, + { PartNumber: 2, ETag: p2.CopyPartResult.ETag, ChecksumCRC32: p2.CopyPartResult.ChecksumCRC32 }, + ] }, + })); + assert.strictEqual(complete.ChecksumType, 'FULL_OBJECT'); + assert.strictEqual(complete.ChecksumCRC32, + await digest('CRC32', Buffer.concat([bigBody, sourceBody]))); + }); + + it('should reuse a matching source checksum without recomputing', async () => { + const srcKey = 'copypart-checksum-reuse-src'; + await s3.send(new PutObjectCommand({ + Bucket: bucket, Key: srcKey, Body: sourceBody, ChecksumAlgorithm: 'CRC32', + })); + const key = 'cpr-reuse'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32' }); + const res = await copyPart(key, uploadId, { CopySource: `${bucket}/${srcKey}` }); + assert.strictEqual(res.CopyPartResult.ChecksumCRC32, await digest('CRC32', sourceBody)); + }); + + it('should checksum a copied part whose source is a multi-part object', async () => { + const srcKey = 'copypart-checksum-mpu-source'; + const srcUploadId = await createMpu(srcKey); + const a = bigBody; // part 1 must be >= 5 MiB to complete the source MPU + const b = Buffer.from('multipart-source-part-B', 'utf8'); + const up1 = await s3.send(new UploadPartCommand({ + Bucket: bucket, Key: srcKey, UploadId: srcUploadId, PartNumber: 1, Body: a })); + const up2 = await s3.send(new UploadPartCommand({ + Bucket: bucket, Key: srcKey, UploadId: srcUploadId, PartNumber: 2, Body: b })); + await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: srcKey, UploadId: srcUploadId, + MultipartUpload: { Parts: [{ PartNumber: 1, ETag: up1.ETag }, { PartNumber: 2, ETag: up2.ETag }] }, + })); + const key = 'cpr-mp-source'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32' }); + const res = await copyPart(key, uploadId, { CopySource: `${bucket}/${srcKey}` }); + assert.strictEqual(res.CopyPartResult.ChecksumCRC32, await digest('CRC32', Buffer.concat([a, b]))); + }); + + it('should complete an MPU mixing an uploaded part and a copied part', async () => { + const key = 'cmp-mixed'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32', ChecksumType: 'FULL_OBJECT' }); + const partBody = bigBody; // uploaded part 1 must be >= 5 MiB + const up1 = await s3.send(new UploadPartCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, PartNumber: 1, Body: partBody, ChecksumAlgorithm: 'CRC32', + })); + const cp2 = await copyPart(key, uploadId, { PartNumber: 2 }); + const complete = await s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [ + { PartNumber: 1, ETag: up1.ETag, ChecksumCRC32: up1.ChecksumCRC32 }, + { PartNumber: 2, ETag: cp2.CopyPartResult.ETag, ChecksumCRC32: cp2.CopyPartResult.ChecksumCRC32 }, + ] }, + })); + assert.strictEqual(complete.ChecksumType, 'FULL_OBJECT'); + assert.strictEqual(complete.ChecksumCRC32, + await digest('CRC32', Buffer.concat([partBody, sourceBody]))); + }); + + it('should reject CompleteMPU on a COMPOSITE MPU when the copied part checksum is omitted', async () => { + const key = 'cmp-omit-cksum'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32', ChecksumType: 'COMPOSITE' }); + const copy = await copyPart(key, uploadId); + await assert.rejects( + s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [{ PartNumber: 1, ETag: copy.CopyPartResult.ETag }] }, + })), + err => { + assert.strictEqual(err.name, 'InvalidRequest'); + return true; + }); + }); + + it('should reject CompleteMPU when the copied part checksum is wrong', async () => { + const key = 'cmp-wrong-cksum'; + const uploadId = await createMpu(key, { ChecksumAlgorithm: 'CRC32', ChecksumType: 'COMPOSITE' }); + const copy = await copyPart(key, uploadId); + await assert.rejects( + s3.send(new CompleteMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + MultipartUpload: { Parts: [ + { PartNumber: 1, ETag: copy.CopyPartResult.ETag, ChecksumCRC32: 'AAAAAA==' }] }, + })), + err => { + assert.strictEqual(err.name, 'InvalidPart'); + return true; + }); + }); + }) +); diff --git a/tests/functional/aws-node-sdk/test/object/mpuUploadPartChecksum.js b/tests/functional/aws-node-sdk/test/object/mpuUploadPartChecksum.js index 1bcf410912..4961b87a48 100644 --- a/tests/functional/aws-node-sdk/test/object/mpuUploadPartChecksum.js +++ b/tests/functional/aws-node-sdk/test/object/mpuUploadPartChecksum.js @@ -129,7 +129,15 @@ describe('UploadPart checksum validation', () => PartNumber: 3 + idx, Body: partBody, [checksumField[otherAlgo]]: correctDigest[otherAlgo], })), - { name: 'InvalidRequest' }, + err => { + assert.strictEqual(err.name, 'InvalidRequest', + `expected InvalidRequest, got ${err.name}: ${err.message}`); + // AWS names the expected (MPU) and actual (sent) algorithms. + assert.match(err.message, new RegExp( + `expected checksum Type: ${mpuAlgo.toLowerCase()}, ` + + `actual checksum Type: ${otherAlgo.toLowerCase()}`)); + return true; + }, ); }); }); @@ -175,12 +183,85 @@ describe('UploadPart checksum validation', () => }); }); - it('should accept part with no checksum header', async () => { - const res = await s3.send(new UploadPartCommand({ + it('should return no per-part checksum when none is sent', async () => { + // WHEN_REQUIRED so the SDK does not auto-attach a crc32: the + // part is genuinely uploaded with no checksum. + const noCksumS3 = new BucketUtility('default', { + ...sigCfg, + requestChecksumCalculation: 'WHEN_REQUIRED', + responseChecksumValidation: 'WHEN_REQUIRED', + }).s3; + const res = await noCksumS3.send(new UploadPartCommand({ Bucket: bucket, Key: key, UploadId: uploadId, PartNumber: 2 * allAlgos.length + 1, Body: partBody, })); assert(res.ETag); + const present = ['ChecksumCRC32', 'ChecksumCRC32C', 'ChecksumCRC64NVME', + 'ChecksumSHA1', 'ChecksumSHA256'].filter(f => res[f] !== undefined); + assert.deepStrictEqual(present, [], + `default MPU UploadPart should return no checksum, got: ${present.join(', ')}`); + }); + }); + + describe('per-part checksum requirement by checksum type', () => { + // WHEN_REQUIRED so the SDK does not auto-attach a checksum, letting + // us upload a genuinely checksum-less part. + let noCksumS3; + const openUploads = []; + + before(() => { + noCksumS3 = new BucketUtility('default', { + ...sigCfg, + requestChecksumCalculation: 'WHEN_REQUIRED', + responseChecksumValidation: 'WHEN_REQUIRED', + }).s3; + }); + + after(async () => { + await Promise.all(openUploads.map(uploadId => + noCksumS3.send(new AbortMultipartUploadCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + })).catch(() => undefined))); + }); + + async function createMpu(algo, type) { + const res = await noCksumS3.send(new CreateMultipartUploadCommand({ + Bucket: bucket, Key: key, ChecksumAlgorithm: algo, ChecksumType: type, + })); + openUploads.push(res.UploadId); + return res.UploadId; + } + + ['CRC32', 'CRC32C', 'SHA1', 'SHA256'].forEach(algo => { + it(`should reject UploadPart with no checksum on a ${algo}/COMPOSITE MPU`, async () => { + const uploadId = await createMpu(algo, 'COMPOSITE'); + await assert.rejects( + noCksumS3.send(new UploadPartCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + PartNumber: 1, Body: partBody, + })), + err => { + assert.strictEqual(err.name, 'InvalidRequest', + `expected InvalidRequest, got ${err.name}: ${err.message}`); + assert.match(err.message, + new RegExp(`expected checksum Type: ${algo.toLowerCase()}`)); + return true; + }, + ); + }); + }); + + ['CRC32', 'CRC32C', 'CRC64NVME'].forEach(algo => { + it(`should accept UploadPart with no checksum on a ${algo}/FULL_OBJECT MPU`, async () => { + const uploadId = await createMpu(algo, 'FULL_OBJECT'); + const res = await noCksumS3.send(new UploadPartCommand({ + Bucket: bucket, Key: key, UploadId: uploadId, + PartNumber: 1, Body: partBody, + })); + assert(res.ETag); + assert(res[`Checksum${algo}`], + `expected Checksum${algo} echoed, got: ${JSON.stringify(res)}`); + }); }); }); }) diff --git a/tests/functional/aws-node-sdk/test/object/objectCopy.js b/tests/functional/aws-node-sdk/test/object/objectCopy.js index 38a73c10d2..be706a1037 100644 --- a/tests/functional/aws-node-sdk/test/object/objectCopy.js +++ b/tests/functional/aws-node-sdk/test/object/objectCopy.js @@ -2135,5 +2135,73 @@ describe('Object Copy checksum behavior', () => { assert.strictEqual(err.message, expected); } }); + + // A self-copy whose ONLY change is the checksum algorithm + // (source key === dest key, default COPY metadata directive) must be + // allowed and must recompute the checksum in place, matching AWS. + const selfCopyBody = 'in-place-checksum-change-body'; + checksumFixtures.forEach(({ algo, header, key }) => { + const sourceHeader = header === 'CRC32' ? 'SHA256' : 'CRC32'; + it(`should change an object's checksum to ${algo} via a self-copy (COPY directive)`, async () => { + await s3.send( + new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: selfCopyBody, + ChecksumAlgorithm: sourceHeader, + }), + ); + const expectedDigest = await Promise.resolve(algorithms[algo].digest(Buffer.from(selfCopyBody))); + + // Copy the object onto itself, changing only the checksum + // algorithm. MetadataDirective is omitted, so it defaults to + // COPY — the case the guard wrongly rejects. + const copyRes = await s3.send( + new CopyObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + ChecksumAlgorithm: header, + }), + ); + assert.strictEqual(copyRes.CopyObjectResult[key], expectedDigest); + assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT'); + + const headRes = await s3.send( + new HeadObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + ChecksumMode: 'ENABLED', + }), + ); + assert.strictEqual(headRes[key], expectedDigest); + assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT'); + }); + }); + + // The checksum carve-out must not open the guard for a genuine no-op: + // a self-copy with no checksum algorithm and no other change is still + // rejected. + it('should still reject a no-change self-copy (COPY directive)', async () => { + await s3.send( + new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: selfCopyBody, + }), + ); + try { + await s3.send( + new CopyObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + }), + ); + throw new Error('Expected 400 InvalidRequest'); + } catch (err) { + checkError(err, 'InvalidRequest', 400); + } + }); }); }); diff --git a/tests/functional/raw-node/test/checksumPutObjectUploadPart.js b/tests/functional/raw-node/test/checksumPutObjectUploadPart.js index 855eb3ebe8..570b15208c 100644 --- a/tests/functional/raw-node/test/checksumPutObjectUploadPart.js +++ b/tests/functional/raw-node/test/checksumPutObjectUploadPart.js @@ -154,7 +154,7 @@ let crc64nvmeOfTrailerContent; // Create the common protocol-scenario tests for a given URL factory. // urlFn() is called lazily at test runtime so that uploadId is available. -function makeScenarioTests(urlFn) { +function makeScenarioTests(urlFn, { expectsImplicitChecksum = true } = {}) { before(async () => { if (!crc64nvmeOfTestContent2) { crc64nvmeOfTestContent2 = await algorithms.crc64nvme.digest(testContent2); @@ -164,6 +164,18 @@ function makeScenarioTests(urlFn) { } }); + // When no client checksum is sent, PutObject echoes the server-computed + // default crc64nvme, but a default-MPU UploadPart does not (matching AWS). + function assertImplicitChecksum(res, expected) { + if (expectsImplicitChecksum) { + assert.strictEqual(res.headers['x-amz-checksum-crc64nvme'], expected, + `expected x-amz-checksum-crc64nvme: ${expected}`); + } else { + assert.strictEqual(res.headers['x-amz-checksum-crc64nvme'], undefined, + 'default-MPU UploadPart should not echo an implicit checksum'); + } + } + itSkipIfAWS( 'should return 200 for signed sha256 in x-amz-content-sha256, no x-amz-checksum header', done => { @@ -172,8 +184,7 @@ function makeScenarioTests(urlFn) { 'content-length': testContent2.length, }, testContent2, (err, res) => { assertStatus(200)(err, res, () => { - assert.strictEqual(res.headers['x-amz-checksum-crc64nvme'], crc64nvmeOfTestContent2, - `expected x-amz-checksum-crc64nvme: ${crc64nvmeOfTestContent2}`); + assertImplicitChecksum(res, crc64nvmeOfTestContent2); done(); }); }); @@ -459,8 +470,7 @@ function makeScenarioTests(urlFn) { 'content-length': Buffer.byteLength(body), }, body, (err, res) => { assertStatus(200)(err, res, () => { - assert.strictEqual(res.headers['x-amz-checksum-crc64nvme'], crc64nvmeOfTrailerContent, - `expected x-amz-checksum-crc64nvme: ${crc64nvmeOfTrailerContent}`); + assertImplicitChecksum(res, crc64nvmeOfTrailerContent); done(); }); }); @@ -479,8 +489,7 @@ function makeScenarioTests(urlFn) { 'content-length': Buffer.byteLength(body), }, body, (err, res) => { assertStatus(200)(err, res, () => { - assert.strictEqual(res.headers['x-amz-checksum-crc64nvme'], crc64nvmeOfTrailerContent, - `expected x-amz-checksum-crc64nvme: ${crc64nvmeOfTrailerContent}`); + assertImplicitChecksum(res, crc64nvmeOfTrailerContent); done(); }); }); @@ -731,7 +740,8 @@ describe('UploadPart: trailer and checksum protocol scenarios', () => { }); makeScenarioTests( - () => `http://localhost:8000/${bucket}/${objectKey}?partNumber=1&uploadId=${uploadId2}` + () => `http://localhost:8000/${bucket}/${objectKey}?partNumber=1&uploadId=${uploadId2}`, + { expectsImplicitChecksum: false }, ); }); diff --git a/tests/unit/api/apiUtils/object/sourceChecksum.js b/tests/unit/api/apiUtils/object/sourceChecksum.js new file mode 100644 index 0000000000..5137700e74 --- /dev/null +++ b/tests/unit/api/apiUtils/object/sourceChecksum.js @@ -0,0 +1,166 @@ +const assert = require('assert'); +const sinon = require('sinon'); +const { Readable } = require('stream'); + +const { + buildSourcePartsStream, + computeChecksumFromDataLocator, +} = require('../../../../../lib/api/apiUtils/object/sourceChecksum'); +const dataWrapper = require('../../../../../lib/data/wrapper'); +const { algorithms } = require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); +const { DummyRequestLogger } = require('../../../helpers'); + +const log = new DummyRequestLogger(); + +// Build a source-part descriptor carrying its bytes (via `value`) for the +// stubbed data.get below to serve. `getError` makes data.get fail on that part. +function part(key, value, opts = {}) { + return { + key, + value, + dataStoreName: 'mem', + dataStoreType: 'mem', + ...opts, + }; +} + +// Drain a readable and hand back the fully concatenated bytes (or the error). +function collect(stream, cb) { + const chunks = []; + stream.on('data', chunk => chunks.push(chunk)); + stream.once('error', err => cb(err)); + stream.once('end', () => cb(null, Buffer.concat(chunks))); +} + +// Promisified computeChecksumFromDataLocator for the async digest assertions. +function computeChecksum(dataLocator, algorithm) { + return new Promise((resolve, reject) => { + computeChecksumFromDataLocator(dataLocator, algorithm, log, (err, res) => + (err ? reject(err) : resolve(res))); + }); +} + +describe('sourceChecksum util', () => { + beforeEach(() => { + // Emulate the two data.get shapes buildSourcePartsStream relies on: + // azure writes part bytes into the provided writable; every other + // backend returns a Readable through the callback. + sinon.stub(dataWrapper.data, 'get').callsFake((p, writable, log2, cb) => { + if (p.getError) { + return cb(p.getError); + } + const bytes = Buffer.from(p.value || ''); + if (p.dataStoreType === 'azure') { + process.nextTick(() => writable.end(bytes)); + return cb(null); + } + const rs = new Readable({ read() {} }); + // Push after the caller has wired up listeners + pipe in its callback. + process.nextTick(() => { + if (bytes.length) { + rs.push(bytes); + } + rs.push(null); + }); + return cb(null, rs); + }); + }); + + afterEach(() => sinon.restore()); + + describe('buildSourcePartsStream', () => { + it('should concatenate parts in order', done => { + const locator = [part('a', 'Hello, '), part('b', 'world'), part('c', '!')]; + collect(buildSourcePartsStream(locator, log), (err, buf) => { + assert.ifError(err); + assert.strictEqual(buf.toString(), 'Hello, world!'); + done(); + }); + }); + + it('should serve azure parts (data.get writes into the provided writable)', done => { + const locator = [part('a', 'azure-bytes', { dataStoreType: 'azure', dataStoreName: 'azurebackend' })]; + collect(buildSourcePartsStream(locator, log), (err, buf) => { + assert.ifError(err); + assert.strictEqual(buf.toString(), 'azure-bytes'); + done(); + }); + }); + + it('should concatenate mixed azure and regular parts in order', done => { + const locator = [ + part('a', 'one-'), + part('b', 'two-', { dataStoreType: 'azure', dataStoreName: 'azurebackend' }), + part('c', 'three'), + ]; + collect(buildSourcePartsStream(locator, log), (err, buf) => { + assert.ifError(err); + assert.strictEqual(buf.toString(), 'one-two-three'); + done(); + }); + }); + + it('should emit an empty stream for an empty dataLocator', done => { + collect(buildSourcePartsStream([], log), (err, buf) => { + assert.ifError(err); + assert.strictEqual(buf.length, 0); + done(); + }); + }); + + it('should propagate a mid-stream read error wrapped with copyPart metadata', done => { + const boom = new Error('read failed'); + const locator = [ + part('ok', 'good'), + part('bad', '', { dataStoreName: 'ring0', dataStoreType: 'scality', getError: boom }), + ]; + const stream = buildSourcePartsStream(locator, log); + stream.on('data', () => {}); + stream.once('error', err => { + assert.strictEqual(err, boom); + assert.deepStrictEqual(err.copyPart, { + key: 'bad', + dataStoreName: 'ring0', + dataStoreType: 'scality', + }); + done(); + }); + }); + }); + + describe('computeChecksumFromDataLocator', () => { + const locator = [part('a', 'Hello, '), part('b', 'world'), part('c', '!')]; + const fullBytes = Buffer.from('Hello, world!'); + + // Derived from the algorithms map so a newly added algorithm is covered + // automatically. + Object.keys(algorithms).forEach(algo => { + it(`should compute the ${algo} digest over the concatenated source bytes`, async () => { + const expected = await algorithms[algo].digest(fullBytes); + const result = await computeChecksum(locator, algo); + assert.strictEqual(result.algorithm, algo); + assert.strictEqual(result.value, expected); + }); + + it(`should compute the empty-input ${algo} digest for an empty dataLocator`, async () => { + const expected = await algorithms[algo].digest(Buffer.alloc(0)); + const result = await computeChecksum([], algo); + assert.strictEqual(result.value, expected); + }); + }); + + it('should surface a read error wrapped with copyPart metadata', done => { + const boom = new Error('read failed'); + const locator2 = [part('bad', '', { dataStoreName: 'ring0', dataStoreType: 'scality', getError: boom })]; + computeChecksumFromDataLocator(locator2, 'crc32', log, err => { + assert.strictEqual(err, boom); + assert.deepStrictEqual(err.copyPart, { + key: 'bad', + dataStoreName: 'ring0', + dataStoreType: 'scality', + }); + done(); + }); + }); + }); +}); diff --git a/tests/unit/api/multipartUpload.js b/tests/unit/api/multipartUpload.js index c3a24cfb7b..a899e2e6e6 100644 --- a/tests/unit/api/multipartUpload.js +++ b/tests/unit/api/multipartUpload.js @@ -38,8 +38,16 @@ const { LOCATION_NAME_CRR } = require('../../constants'); const { data } = require('../../../lib/data/wrapper'); const { metadata } = storage.metadata.inMemory.metadata; const metadataBackend = storage.metadata.inMemory.metastore; +const originalDeleteObject = metadataBackend.deleteObject; const { ds } = storage.data.inMemory.datastore; +// Several tests override metadataBackend.deleteObject and restore it only on +// their success path, so a thrown assertion would leak the override into every +// later test. Reset it before each test (root hook, runs across all describes). +beforeEach(() => { + metadataBackend.deleteObject = originalDeleteObject; +}); + const log = new DummyRequestLogger(); const splitter = constants.splitter; @@ -3315,6 +3323,7 @@ describe('multipart upload in ingestion bucket', () => { assert.strictEqual(headers['x-amz-version-id'], versionID); assert.strictEqual(dataClient.createMPU.lastCall.args[1]['x-amz-meta-scal-version-id'], undefined); }); + }); describe('initiateMultipartUpload with objectKeyByteLimit', () => { @@ -3431,17 +3440,6 @@ describe('objectPutPart checksum response headers', () => { done(); }); }); - - it('should return x-amz-checksum-crc64nvme response header when no checksum header is provided', done => { - const expectedCrc64nvme = '5evlCr2wyO4='; - const partRequest = _createPutPartRequest(testUploadId, '1', postBody); - - objectPutPart(authInfo, partRequest, undefined, log, (err, _hexDigest, resHeaders) => { - assert.ifError(err); - assert.strictEqual(resHeaders['x-amz-checksum-crc64nvme'], expectedCrc64nvme); - done(); - }); - }); }); describe('initiateMultipartUpload checksum headers', () => { @@ -3898,6 +3896,30 @@ describe('validatePerPartChecksums', () => { assert.strictEqual(err.message, 'InvalidPart'); }); }); + + describe('external backend MPU (isExternal=true)', () => { + // External parts store no per-part checksum, so the COMPOSITE requirement + // is relaxed - but a checksum the client submits is still rejected, since + // there is no stored value to verify it against. + it('should not require a per-part checksum (external parts store none)', () => { + const mpuChecksum = { algorithm: 'crc32', type: 'COMPOSITE', isDefault: false }; + const stored = [makeStoredPart(1, null)]; + const jsonList = { Part: [makeJsonPart(1, 'etag1')] }; + const err = validatePerPartChecksums(jsonList, stored, splitter, mpuChecksum, true); + assert.ifError(err); + }); + + it('should still return InvalidPart for a client-submitted checksum (nothing to verify against)', () => { + const mpuChecksum = { algorithm: 'crc32', type: 'FULL_OBJECT', isDefault: false }; + const stored = [makeStoredPart(1, null)]; + const jsonList = { + Part: [makeJsonPart(1, 'etag1', { ChecksumCRC32: SAMPLE_DIGESTS.crc32[0] })], + }; + const err = validatePerPartChecksums(jsonList, stored, splitter, mpuChecksum, true); + assert(err); + assert.strictEqual(err.message, 'InvalidPart'); + }); + }); }); describe('CompleteMultipartUpload x-amz-checksum-type header', () => { @@ -4406,3 +4428,117 @@ describe('CompleteMultipartUpload final-object checksum response', () => { assert.strictEqual(headers['x-amz-checksum-type'], undefined); }); }); + +describe('CompleteMultipartUpload per-part validation on external backends', () => { + // External backend parts store no per-part checksum, so CompleteMPU relaxes + // the COMPOSITE per-part requirement for them - but still rejects any checksum + // the client submits, since it can't be verified. The location is flipped via + // a getLocationConstraintType stub so only that gate differs. + const dataClient = data.client; + const prevDataImplName = data.implName; + const prevConfigBackendsData = data.config.backends.data; + const versionID = versioning.VersionID.encode( + versioning.VersionID.generateVersionId('0', '')); + + before(() => { + // Simulate a backend that handles the MPU itself, so uploaded parts get + // no local shadow checksum and CompleteMPU is backend-driven. + data.switch(new storage.data.MultipleBackendGateway( + { + 'us-east-1': dataClient, + 'us-east-2': dataClient, + }, + metadata, + data.locStorageCheckFn, + )); + data.implName = 'multipleBackends'; + data.config.backends.data = 'multiple'; + dataClient.clientType = 'aws_s3'; + }); + + after(() => { + data.switch(dataClient); + data.implName = prevDataImplName; + data.config.backends.data = prevConfigBackendsData; + delete dataClient.clientType; + }); + + beforeEach(() => { + cleanup(); + dataClient.createMPU = sinon.stub().yields(undefined, { uploadId: 'mock-uploadId' }); + dataClient.uploadPart = sinon.stub().yields(undefined, { + dataStoreType: dataClient.clientType, + dataStoreETag: 'mock-part-eTag', + }); + dataClient.completeMPU = sinon.stub().yields(undefined, { + key: objectKey, + eTag: 'mock-eTag', + dataStoreVersionId: versionID, + contentLength: 12, + }); + }); + + afterEach(() => { + sinon.restore(); + }); + + const newPutIngestBucketRequest = location => + new DummyRequest({ + bucketName, + namespace, + headers: { host: `${bucketName}.s3.amazonaws.com` }, + url: '/', + post: + '' + + '' + + `${location}` + + '', + }); + + // Create an MPU on the (external) ingest backend and upload one part. The + // part is stored without a per-part checksum, as external backends do. + async function _initiateExternalMpu({ algo = 'CRC32', type = 'COMPOSITE' } = {}) { + await _bucketPut(authInfo, newPutIngestBucketRequest('us-east-1:ingest'), log); + const initiate = new DummyRequest({ + bucketName, + namespace, + objectKey, + headers: { + host: `${bucketName}.s3.amazonaws.com`, + 'x-amz-checksum-algorithm': algo, + 'x-amz-checksum-type': type, + }, + url: `/${objectKey}?uploads`, + }); + const initRes = await util.promisify(initiateMultipartUpload)(authInfo, initiate, log); + const uploadId = (await parseStringPromise(initRes)).InitiateMultipartUploadResult.UploadId[0]; + const partReq = _createPutPartRequest(uploadId, 1, Buffer.from('part body', 'utf8')); + const eTag = await util.promisify(objectPutPart)(authInfo, partReq, undefined, log); + return { uploadId, eTag }; + } + + const _complete = completeReq => + util.promisify(cb => completeMultipartUpload(authInfo, completeReq, log, cb))(); + + describe('COMPOSITE MPU (no per-part checksum)', () => { + it('should reject on a local location', async () => { + const { uploadId, eTag } = await _initiateExternalMpu({ type: 'COMPOSITE' }); + const completeReq = _createCompleteMpuRequest(uploadId, [{ partNumber: 1, eTag }]); + sinon.stub(config, 'getLocationConstraintType').returns('scality'); + await assert.rejects(_complete(completeReq), err => { + assert.match(err.message, /InvalidRequest/); + return true; + }); + }); + + it('should complete on an external location', async () => { + const { uploadId, eTag } = await _initiateExternalMpu({ type: 'COMPOSITE' }); + const completeReq = _createCompleteMpuRequest(uploadId, [{ partNumber: 1, eTag }]); + sinon.stub(config, 'getLocationConstraintType').returns('aws_s3'); + const result = await _complete(completeReq); + assert(result, 'external COMPOSITE MPU should complete without per-part checksums'); + }); + }); + +}); diff --git a/tests/unit/api/objectCopyPart.js b/tests/unit/api/objectCopyPart.js index 46c95a0452..7b10d93076 100644 --- a/tests/unit/api/objectCopyPart.js +++ b/tests/unit/api/objectCopyPart.js @@ -13,6 +13,13 @@ const metadataswitch = require('../metadataswitch'); const DummyRequest = require('../DummyRequest'); const { cleanup, DummyRequestLogger, makeAuthInfo, versioningTestUtils } = require('../helpers'); +const { Readable } = require('stream'); +const { algorithms } = require('../../../lib/api/apiUtils/integrity/validateChecksums'); +const { data } = require('../../../lib/data/wrapper'); +const { config } = require('../../../lib/Config'); +const kms = require('../../../lib/kms/wrapper'); + +const checksumAlgos = Object.keys(algorithms); const log = new DummyRequestLogger(); const canonicalID = 'accessKey1'; @@ -31,12 +38,12 @@ function _createBucketPutRequest(bucketName) { }); } -function _createInitiateRequest(bucketName) { +function _createInitiateRequest(bucketName, extraHeaders) { const params = { bucketName, namespace, objectKey, - headers: { host: `${bucketName}.s3.amazonaws.com` }, + headers: { host: `${bucketName}.s3.amazonaws.com`, ...extraHeaders }, url: `/${objectKey}?uploads`, }; return new DummyRequest(params); @@ -155,3 +162,240 @@ describe('objectCopyPart', () => { }); }); }); + +describe('objectPutCopyPart._shouldRecomputeChecksum', () => { + const { _shouldRecomputeChecksum } = objectPutCopyPart; + const noRange = { headers: {} }; + const withRange = { headers: { 'x-amz-copy-source-range': 'bytes=0-1' } }; + + it('should return true when a copy-source-range is requested', () => { + assert.strictEqual(_shouldRecomputeChecksum(withRange, + { checksumType: 'FULL_OBJECT', checksumAlgorithm: 'crc32' }, 'crc32'), true); + }); + + it('should return true when the source has no checksum', () => { + assert.strictEqual(_shouldRecomputeChecksum(noRange, undefined, 'crc32'), true); + }); + + checksumAlgos.forEach(algo => { + const otherAlgo = algo === 'sha256' ? 'crc32' : 'sha256'; + + it(`should return false when the source is FULL_OBJECT ${algo} matching the MPU`, () => { + assert.strictEqual(_shouldRecomputeChecksum(noRange, + { checksumType: 'FULL_OBJECT', checksumAlgorithm: algo }, algo), false); + }); + + it(`should return true when the source ${otherAlgo} differs from the MPU ${algo}`, () => { + assert.strictEqual(_shouldRecomputeChecksum(noRange, + { checksumType: 'FULL_OBJECT', checksumAlgorithm: otherAlgo }, algo), true); + }); + + it(`should return true when the source ${algo} checksum is COMPOSITE`, () => { + assert.strictEqual(_shouldRecomputeChecksum(noRange, + { checksumType: 'COMPOSITE', checksumAlgorithm: algo }, algo), true); + }); + }); +}); + +describe('objectPutCopyPart checksum storage', () => { + const objData = Buffer.from('foo', 'utf8'); + + function _initiateWithHeaders(headers, cb) { + const req = _createInitiateRequest(destBucketName, headers); + return initiateMultipartUpload(authInfo, req, log, (err, res) => { + if (err) { + return cb(err); + } + return parseString(res, (parseErr, json) => + cb(parseErr, json.InitiateMultipartUploadResult.UploadId[0])); + }); + } + + // The part metadata is the last object written; pull its checksum fields. + function _storedPartChecksum() { + const omVal = metadataswitch.putObjectMD.lastCall.args[2]; + return { algorithm: omVal.checksumAlgorithm, value: omVal.checksumValue }; + } + + // Initiate an MPU for `algo`, optionally inject the source object's stored + // checksum (to drive the reuse-vs-recompute decision), then copy a part and + // resolve with the checksum persisted in the part metadata. + function _copyPart(algo, { sourceKey = objectKey, sourceChecksum, headers } = {}) { + return new Promise((resolve, reject) => { + _initiateWithHeaders({ 'x-amz-checksum-algorithm': algo.toUpperCase() }, (err, uploadId) => { + if (err) { + return reject(err); + } + if (sourceChecksum) { + metadata.keyMaps.get(sourceBucketName).get(sourceKey).checksum = sourceChecksum; + } + const req = _createObjectCopyPartRequest(destBucketName, uploadId, headers); + return objectPutCopyPart(authInfo, req, sourceBucketName, sourceKey, undefined, log, + copyErr => (copyErr ? reject(copyErr) : resolve(_storedPartChecksum()))); + }); + }); + } + + beforeEach(done => { + cleanup(); + sinon.spy(metadataswitch, 'putObjectMD'); + async.waterfall([ + cb => bucketPut(authInfo, putDestBucketRequest, log, e => cb(e)), + cb => bucketPut(authInfo, putSourceBucketRequest, log, e => cb(e)), + cb => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData), undefined, log, e => cb(e)), + ], done); + }); + + afterEach(() => { + sinon.restore(); + cleanup(); + }); + + checksumAlgos.forEach(algo => { + const otherAlgo = algo === 'sha256' ? 'crc32' : 'sha256'; + const mismatch = { checksumType: 'FULL_OBJECT', checksumAlgorithm: otherAlgo, checksumValue: 'unused' }; + + it(`should recompute the part checksum (${algo}) when the source algorithm differs`, async () => { + const expected = await algorithms[algo].digest(objData); + assert.deepStrictEqual(await _copyPart(algo, { sourceChecksum: mismatch }), + { algorithm: algo, value: expected }); + }); + + it(`should reuse the source checksum (${algo}) when the algorithm matches`, async () => { + const sourceValue = await algorithms[algo].digest(objData); + assert.deepStrictEqual(await _copyPart(algo, { + sourceChecksum: { checksumType: 'FULL_OBJECT', checksumAlgorithm: algo, checksumValue: sourceValue }, + }), { algorithm: algo, value: sourceValue }); + }); + + it(`should recompute over the ranged bytes (${algo}) when a copy-source-range is set`, async () => { + const expected = await algorithms[algo].digest(Buffer.from('fo')); + assert.deepStrictEqual(await _copyPart(algo, { + sourceChecksum: { checksumType: 'FULL_OBJECT', checksumAlgorithm: algo, + checksumValue: await algorithms[algo].digest(objData) }, + headers: { 'x-amz-copy-source-range': 'bytes=0-1' }, + }), { algorithm: algo, value: expected }); + }); + + it(`should store the empty-bytes digest (${algo}) for a 0-byte source`, async () => { + const expected = await algorithms[algo].digest(Buffer.alloc(0)); + const emptyKey = `empty-source-${algo}`; + await new Promise((resolve, reject) => objectPut(authInfo, + versioningTestUtils.createPutObjectRequest(sourceBucketName, emptyKey, Buffer.alloc(0)), + undefined, log, e => (e ? reject(e) : resolve()))); + assert.deepStrictEqual(await _copyPart(algo, { sourceKey: emptyKey, sourceChecksum: mismatch }), + { algorithm: algo, value: expected }); + }); + }); + + it('should use the one-pass stream (not data.uploadPartCopy) for a local destination', done => { + _initiateWithHeaders({ 'x-amz-checksum-algorithm': 'CRC32' }, (err, uploadId) => { + assert.ifError(err); + metadata.keyMaps.get(sourceBucketName).get(objectKey).checksum = + { checksumType: 'FULL_OBJECT', checksumAlgorithm: 'sha256', checksumValue: 'unused' }; + const uploadPartCopySpy = sinon.spy(data, 'uploadPartCopy'); + const req = _createObjectCopyPartRequest(destBucketName, uploadId); + objectPutCopyPart(authInfo, req, sourceBucketName, objectKey, undefined, log, copyErr => { + assert.ifError(copyErr); + sinon.assert.notCalled(uploadPartCopySpy); + done(); + }); + }); + }); + + it('should route an external-backend destination through data.uploadPartCopy and return no checksum', done => { + _initiateWithHeaders({ 'x-amz-checksum-algorithm': 'CRC32' }, (err, uploadId) => { + assert.ifError(err); + metadata.keyMaps.get(sourceBucketName).get(objectKey).checksum = + { checksumType: 'FULL_OBJECT', checksumAlgorithm: 'sha256', checksumValue: 'unused' }; + // Make the destination look like an external backend (data.put can't store its parts)... + sinon.stub(config, 'getLocationConstraintType').returns('aws_s3'); + // ...and simulate the backend's native part copy returning the skip sentinel. + const uploadPartCopyStub = sinon.stub(data, 'uploadPartCopy').callsFake((...args) => { + const cb = args[args.length - 1]; + return cb(new Error('skip'), 'etag', '2020-01-01T00:00:00.000Z', null, [{ dataStoreETag: 'etag' }]); + }); + const req = _createObjectCopyPartRequest(destBucketName, uploadId); + objectPutCopyPart(authInfo, req, sourceBucketName, objectKey, undefined, log, (copyErr, xml) => { + assert.ifError(copyErr); + sinon.assert.calledOnce(uploadPartCopyStub); + // external backends get no cloudserver checksum (matches UploadPart) + assert.doesNotMatch(xml, /Checksum/); + done(); + }); + }); + }); +}); + +describe('objectPutCopyPart._copyPartStreamingWithChecksum', () => { + const { _copyPartStreamingWithChecksum } = objectPutCopyPart; + const srcBytes = Buffer.from('hello-copy-part', 'utf8'); + const dataLocator = [{ key: 'srckey', dataStoreType: 'mem', dataStoreName: 'mem' }]; + + beforeEach(() => { + // Serve the source bytes for buildSourcePartsStream. + sinon.stub(data, 'get').callsFake((part, writable, log2, cb) => { + const rs = new Readable({ read() {} }); + process.nextTick(() => { + rs.push(srcBytes); + rs.push(null); + }); + return cb(null, rs); + }); + // Drain the checksum stream (so it flushes its digest) and report an md5. + sinon.stub(data, 'put').callsFake((cipherBundle, stream, size, ctx, backendInfo, log2, cb) => { + stream.on('data', () => {}); + stream.once('end', () => cb(null, { key: 'destkey', dataStoreName: 'mem' }, { completedHash: 'fakemd5' })); + }); + }); + + afterEach(() => sinon.restore()); + + function _run(sse, algo) { + return new Promise((resolve, reject) => + _copyPartStreamingWithChecksum(dataLocator, srcBytes.length, sse, 'us-east-1', {}, algo, log, + (err, result) => (err ? reject(err) : resolve(result)))); + } + + checksumAlgos.forEach(algo => { + it(`should return the part location, eTag and ${algo} checksum for an unencrypted copy`, async () => { + const result = await _run(null, algo); + assert.deepStrictEqual(result.locations, [{ + key: 'destkey', dataStoreName: 'mem', dataStoreETag: 'fakemd5', size: srcBytes.length, + }]); + assert.strictEqual(result.totalHash, 'fakemd5'); + assert.deepStrictEqual(result.checksum, + { algorithm: algo, value: await algorithms[algo].digest(srcBytes) }); + }); + + it(`should add the SSE cipher fields with a ${algo} checksum when the MPU is encrypted`, async () => { + const cipherBundle = { cryptoScheme: 1, cipheredDataKey: 'dk', algorithm: 'AES256', masterKeyId: 'mk' }; + sinon.stub(kms, 'createCipherBundle').callsFake((sse, log2, cb) => cb(null, cipherBundle)); + const result = await _run({ algorithm: 'AES256' }, algo); + assert.deepStrictEqual(result.locations[0], { + key: 'destkey', + dataStoreName: 'mem', + dataStoreETag: 'fakemd5', + size: srcBytes.length, + sseCryptoScheme: 1, + sseCipheredDataKey: 'dk', + sseAlgorithm: 'AES256', + sseMasterKeyId: 'mk', + }); + assert.deepStrictEqual(result.checksum, + { algorithm: algo, value: await algorithms[algo].digest(srcBytes) }); + }); + }); + + it('should surface a source read error wrapped with copyPart metadata', done => { + data.get.restore(); + const boom = new Error('read failed'); + sinon.stub(data, 'get').callsFake((part, writable, log2, cb) => cb(boom)); + _copyPartStreamingWithChecksum(dataLocator, srcBytes.length, null, 'us-east-1', {}, 'crc32', log, err => { + assert.strictEqual(err, boom); + assert.strictEqual(err.copyPart.key, 'srckey'); + done(); + }); + }); +}); diff --git a/tests/unit/api/objectPutPartChecksum.js b/tests/unit/api/objectPutPartChecksum.js index f518ca4885..e09e2884fe 100644 --- a/tests/unit/api/objectPutPartChecksum.js +++ b/tests/unit/api/objectPutPartChecksum.js @@ -123,11 +123,28 @@ describe('objectPutPart checksum validation', () => { }); }); - it('should accept part with no checksum on non-default MPU', done => { + it('should reject part with no checksum on a COMPOSITE MPU', done => { + // sha256 is COMPOSITE-only; a COMPOSITE MPU's final checksum is + // composed from the per-part checksums, so every part must carry + // one and AWS rejects a part sent without it. initiateMPU({ 'x-amz-checksum-algorithm': 'sha256' }, (err, uploadId) => { assert.ifError(err); // No checksum header sent const request = makePutPartRequest(uploadId, 1, partBody); + objectPutPart(authInfo, request, undefined, log, err => { + assert(err, 'Expected an error'); + assert.strictEqual(err.message, 'InvalidRequest'); + done(); + }); + }); + }); + + it('should accept part with no checksum on a FULL_OBJECT MPU', done => { + // crc64nvme is FULL_OBJECT-only; the server computes the + // full-object checksum, so a missing per-part checksum is allowed. + initiateMPU({ 'x-amz-checksum-algorithm': 'crc64nvme' }, (err, uploadId) => { + assert.ifError(err); + const request = makePutPartRequest(uploadId, 1, partBody); objectPutPart(authInfo, request, undefined, log, err => { assert.ifError(err); done(); @@ -298,4 +315,62 @@ describe('objectPutPart checksum validation', () => { }); }); }); + + describe('response checksum header', () => { + const algos = Object.keys(algorithms); + + it('should not return a checksum header on a default MPU when none is sent', done => { + initiateMPU({}, (err, uploadId) => { + assert.ifError(err); + const request = makePutPartRequest(uploadId, 1, partBody); + objectPutPart(authInfo, request, undefined, log, (err, hexDigest, corsHeaders) => { + assert.ifError(err); + algos.forEach(algo => { + assert.strictEqual(corsHeaders[`x-amz-checksum-${algo}`], undefined); + }); + // The part checksum is still stored so CompleteMPU can + // compute the final object checksum. + const partMD = getPartMetadata(uploadId); + assert(partMD); + assert.strictEqual(partMD.checksumAlgorithm, 'crc64nvme'); + assert(partMD.checksumValue); + done(); + }); + }); + }); + + algos.forEach(algo => { + it(`should echo a client-supplied ${algo} checksum on a default MPU`, done => { + initiateMPU({}, (err, uploadId) => { + assert.ifError(err); + Promise.resolve(algorithms[algo].digest(partBody)).then(digest => { + const request = makePutPartRequest(uploadId, 1, partBody, { + [`x-amz-checksum-${algo}`]: digest, + }); + objectPutPart(authInfo, request, undefined, log, (err, hexDigest, corsHeaders) => { + assert.ifError(err); + assert.strictEqual(corsHeaders[`x-amz-checksum-${algo}`], digest); + done(); + }); + }).catch(done); + }); + }); + + it(`should echo the ${algo} checksum on an explicit ${algo} MPU`, done => { + initiateMPU({ 'x-amz-checksum-algorithm': algo }, (err, uploadId) => { + assert.ifError(err); + Promise.resolve(algorithms[algo].digest(partBody)).then(digest => { + const request = makePutPartRequest(uploadId, 1, partBody, { + [`x-amz-checksum-${algo}`]: digest, + }); + objectPutPart(authInfo, request, undefined, log, (err, hexDigest, corsHeaders) => { + assert.ifError(err); + assert.strictEqual(corsHeaders[`x-amz-checksum-${algo}`], digest); + done(); + }); + }).catch(done); + }); + }); + }); + }); }); diff --git a/tests/unit/lib/services.spec.js b/tests/unit/lib/services.spec.js index ba8b4499b2..3249be1c62 100644 --- a/tests/unit/lib/services.spec.js +++ b/tests/unit/lib/services.spec.js @@ -241,6 +241,65 @@ describe('services', () => { }); }); + describe('metadataStorePart checksum fields', () => { + const mpuBucketName = 'mpu-test-bucket'; + const baseParams = { + partNumber: 1, + contentMD5: 'd41d8cd98f00b204e9800998ecf8427e', + size: 5, + uploadId: 'test-upload-id', + splitter: '|', + ownerId: 'ownerCanonicalId', + }; + + let putObjectMDStub; + + beforeEach(() => { + putObjectMDStub = sinon + .stub(metadata, 'putObjectMD') + .callsFake((bucket, key, md, opts, reqLog, cb) => cb(null)); + }); + + it('should persist checksumValue and checksumAlgorithm when both are provided', done => { + const params = { + ...baseParams, + checksumValue: 'NSRBwg==', + checksumAlgorithm: 'crc32', + }; + + services.metadataStorePart(mpuBucketName, [], params, log, err => { + assert.ifError(err); + sinon.assert.calledOnce(putObjectMDStub); + const storedMD = putObjectMDStub.getCall(0).args[2]; + assert.strictEqual(storedMD.checksumValue, 'NSRBwg=='); + assert.strictEqual(storedMD.checksumAlgorithm, 'crc32'); + done(); + }); + }); + + it('should not persist checksum fields when none are provided', done => { + services.metadataStorePart(mpuBucketName, [], { ...baseParams }, log, err => { + assert.ifError(err); + const storedMD = putObjectMDStub.getCall(0).args[2]; + assert.strictEqual(storedMD.checksumValue, undefined); + assert.strictEqual(storedMD.checksumAlgorithm, undefined); + done(); + }); + }); + + it('should not persist checksum fields when only the value is provided', done => { + const params = { ...baseParams, checksumValue: 'NSRBwg==' }; + + services.metadataStorePart(mpuBucketName, [], params, log, err => { + assert.ifError(err); + const storedMD = putObjectMDStub.getCall(0).args[2]; + assert.strictEqual(storedMD.checksumValue, undefined); + assert.strictEqual(storedMD.checksumAlgorithm, undefined); + done(); + }); + }); + }); + describe('metadataValidateMultipart checksum fields', () => { const uploadId = 'test-upload-id'; const authInfo = makeAuthInfo('accessKey1');