Is it possible to access the compressed data before decompression in HttpClient?
Looking at what @Michael did gave me the hint I was missing. After getting the compressed content you can use CryptoStream
, and GZipStream
, and StreamReader
to read the response without loading it into memory more than needed. CryptoStream
will hash the compressed content as it is decompressed and read. Replace the StreamReader
with a FileStream
and you can write the data to a file with minimal memory usage :)
using System;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
class Program
{
static async Task Main()
{
string url = "https://www.googleapis.com/download/storage/v1/b/"
+ "storage-library-test-bucket/o/gzipped-text.txt?alt=media";
var handler = new HttpClientHandler
{
AutomaticDecompression = DecompressionMethods.None
};
var client = new HttpClient(handler);
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
var response = await client.GetAsync(url);
var hashHeader = response.Headers.GetValues("X-Goog-Hash").FirstOrDefault();
Console.WriteLine($"Hash header: {hashHeader}");
string text = null;
using (var md5 = MD5.Create())
{
using (var cryptoStream = new CryptoStream(await response.Content.ReadAsStreamAsync(), md5, CryptoStreamMode.Read))
{
using (var gzipStream = new GZipStream(cryptoStream, CompressionMode.Decompress))
{
using (var streamReader = new StreamReader(gzipStream, Encoding.UTF8))
{
text = streamReader.ReadToEnd();
}
}
Console.WriteLine($"Content: {text}");
var md5HashBase64 = Convert.ToBase64String(md5.Hash);
Console.WriteLine($"MD5 of content: {md5HashBase64}");
}
}
}
}
Output:
Hash header: crc32c=T1s5RQ==,md5=xhF4M6pNFRDQnvaRRNVnkA==
Content: hello world
MD5 of content: xhF4M6pNFRDQnvaRRNVnkA==
V2 of Answer
After reading Jon's response and an updated answer I have the following version. Pretty much the same idea, but I moved the streaming into a special HttpContent
that I inject. Not exactly pretty but the idea is there.
using System;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
class Program
{
static async Task Main()
{
string url = "https://www.googleapis.com/download/storage/v1/b/"
+ "storage-library-test-bucket/o/gzipped-text.txt?alt=media";
var handler = new HttpClientHandler
{
AutomaticDecompression = DecompressionMethods.None
};
var client = new HttpClient(new Intercepter(handler));
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
var response = await client.GetAsync(url);
var hashHeader = response.Headers.GetValues("X-Goog-Hash").FirstOrDefault();
Console.WriteLine($"Hash header: {hashHeader}");
HttpContent content1 = response.Content;
byte[] content = await content1.ReadAsByteArrayAsync();
string text = Encoding.UTF8.GetString(content);
Console.WriteLine($"Content: {text}");
var md5Hash = ((HashingContent)content1).Hash;
var md5HashBase64 = Convert.ToBase64String(md5Hash);
Console.WriteLine($"MD5 of content: {md5HashBase64}");
}
public class Intercepter : DelegatingHandler
{
public Intercepter(HttpMessageHandler innerHandler) : base(innerHandler)
{
}
protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
{
var response = await base.SendAsync(request, cancellationToken);
response.Content = new HashingContent(await response.Content.ReadAsStreamAsync());
return response;
}
}
public sealed class HashingContent : HttpContent
{
private readonly StreamContent streamContent;
private readonly MD5 mD5;
private readonly CryptoStream cryptoStream;
private readonly GZipStream gZipStream;
public HashingContent(Stream content)
{
mD5 = MD5.Create();
cryptoStream = new CryptoStream(content, mD5, CryptoStreamMode.Read);
gZipStream = new GZipStream(cryptoStream, CompressionMode.Decompress);
streamContent = new StreamContent(gZipStream);
}
protected override Task SerializeToStreamAsync(Stream stream, TransportContext context) => streamContent.CopyToAsync(stream, context);
protected override bool TryComputeLength(out long length)
{
length = 0;
return false;
}
protected override Task<Stream> CreateContentReadStreamAsync() => streamContent.ReadAsStreamAsync();
protected override void Dispose(bool disposing)
{
try
{
if (disposing)
{
streamContent.Dispose();
gZipStream.Dispose();
cryptoStream.Dispose();
mD5.Dispose();
}
}
finally
{
base.Dispose(disposing);
}
}
public byte[] Hash => mD5.Hash;
}
}
I managed to get the headerhash correct by:
- creating a custom handler that inherits HttpClientHandler
- overriding
SendAsync
- read as byte the response using
base.SendAsync
- Compress it using GZipStream
- Hashing the Gzip Md5 to base64 (using your code)
this issue is, as you said "before decompression" is not really respected here
The idea is to get this if
working as you would like
https://github.com/dotnet/corefx/blob/master/src/System.Net.Http.WinHttpHandler/src/System/Net/Http/WinHttpResponseParser.cs#L80-L91
it matches
class Program
{
const string url = "https://www.googleapis.com/download/storage/v1/b/storage-library-test-bucket/o/gzipped-text.txt?alt=media";
static async Task Main()
{
//await HashResponseContent(CreateHandler(DecompressionMethods.None));
//await HashResponseContent(CreateHandler(DecompressionMethods.GZip));
await HashResponseContent(new MyHandler());
Console.ReadLine();
}
private static HttpClientHandler CreateHandler(DecompressionMethods decompressionMethods)
{
return new HttpClientHandler { AutomaticDecompression = decompressionMethods };
}
public static async Task HashResponseContent(HttpClientHandler handler)
{
//Console.WriteLine($"Using AutomaticDecompression : '{handler.AutomaticDecompression}'");
//Console.WriteLine($"Using SupportsAutomaticDecompression : '{handler.SupportsAutomaticDecompression}'");
//Console.WriteLine($"Using Properties : '{string.Join('\n', handler.Properties.Keys.ToArray())}'");
var client = new HttpClient(handler);
var response = await client.GetAsync(url);
byte[] content = await response.Content.ReadAsByteArrayAsync();
string text = Encoding.UTF8.GetString(content);
Console.WriteLine($"Content: {text}");
var hashHeader = response.Headers.GetValues("X-Goog-Hash").FirstOrDefault();
Console.WriteLine($"Hash header: {hashHeader}");
byteArrayToMd5(content);
Console.WriteLine($"=====================================================================");
}
public static string byteArrayToMd5(byte[] content)
{
using (var md5 = MD5.Create())
{
var md5Hash = md5.ComputeHash(content);
return Convert.ToBase64String(md5Hash);
}
}
public static byte[] Compress(byte[] contentToGzip)
{
using (MemoryStream resultStream = new MemoryStream())
{
using (MemoryStream contentStreamToGzip = new MemoryStream(contentToGzip))
{
using (GZipStream compressionStream = new GZipStream(resultStream, CompressionMode.Compress))
{
contentStreamToGzip.CopyTo(compressionStream);
}
}
return resultStream.ToArray();
}
}
}
public class MyHandler : HttpClientHandler
{
protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
{
var response = await base.SendAsync(request, cancellationToken);
var responseContent = await response.Content.ReadAsByteArrayAsync().ConfigureAwait(false);
Program.byteArrayToMd5(responseContent);
var compressedResponse = Program.Compress(responseContent);
var compressedResponseMd5 = Program.byteArrayToMd5(compressedResponse);
Console.WriteLine($"recompressed response to md5 : {compressedResponseMd5}");
return response;
}
}
What about disabling automatic decompression, manually adding the Accept-Encoding
header(s) and then decompressing after hash verification?
private static async Task Test2()
{
var url = @"https://www.googleapis.com/download/storage/v1/b/storage-library-test-bucket/o/gzipped-text.txt?alt=media";
var handler = new HttpClientHandler
{
AutomaticDecompression = DecompressionMethods.None
};
var client = new HttpClient(handler);
client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip");
var response = await client.GetAsync(url);
var raw = await response.Content.ReadAsByteArrayAsync();
var hashHeader = response.Headers.GetValues("X-Goog-Hash").FirstOrDefault();
Debug.WriteLine($"Hash header: {hashHeader}");
bool match = false;
using (var md5 = MD5.Create())
{
var md5Hash = md5.ComputeHash(raw);
var md5HashBase64 = Convert.ToBase64String(md5Hash);
match = hashHeader.EndsWith(md5HashBase64);
Debug.WriteLine($"MD5 of content: {md5HashBase64}");
}
if (match)
{
var memInput = new MemoryStream(raw);
var gz = new GZipStream(memInput, CompressionMode.Decompress);
var memOutput = new MemoryStream();
gz.CopyTo(memOutput);
var text = Encoding.UTF8.GetString(memOutput.ToArray());
Console.WriteLine($"Content: {text}");
}
}