Skip to content

Commit

Permalink
Merge pull request sjdirect#69 from halex84/master
Browse files Browse the repository at this point in the history
Created the HttpWebResponseWrapper to abstract response creation...
  • Loading branch information
sjdirect committed Apr 4, 2015
2 parents c983870 + e7540be commit b5cfc16
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 2 deletions.
1 change: 1 addition & 0 deletions Abot/Abot.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
<ItemGroup>
<Compile Include="Core\AbotConfigurationSectionHandler.cs" />
<Compile Include="Core\BloomFilterCrawledUrlRepository.cs" />
<Compile Include="Poco\HttpWebResponseWrapper.cs" />
<Compile Include="Util\BloomFilter.cs" />
<Compile Include="Util\CachedMemoryMonitor.cs" />
<Compile Include="Core\Scheduler.cs" />
Expand Down
2 changes: 1 addition & 1 deletion Abot/Core/PageRequester.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public virtual CrawledPage MakeRequest(Uri uri, Func<CrawledPage, CrawlDecision>
crawledPage.RequestCompleted = DateTime.Now;
if (response != null)
{
crawledPage.HttpWebResponse = response;
crawledPage.HttpWebResponse = new HttpWebResponseWrapper(response);
CrawlDecision shouldDownloadContentDecision = shouldDownloadContent(crawledPage);
if (shouldDownloadContentDecision.Allow)
{
Expand Down
3 changes: 2 additions & 1 deletion Abot/Poco/CrawledPage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using log4net;
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Net;

namespace Abot.Poco
Expand Down Expand Up @@ -47,7 +48,7 @@ public CrawledPage(Uri uri)
/// <summary>
/// Web response from the server. NOTE: The Close() method has been called before setting this property.
/// </summary>
public HttpWebResponse HttpWebResponse { get; set; }
public HttpWebResponseWrapper HttpWebResponse { get; set; }

/// <summary>
/// The web exception that occurred during the crawl
Expand Down
120 changes: 120 additions & 0 deletions Abot/Poco/HttpWebResponseWrapper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;

namespace Abot.Poco
{
/// <summary>Result of crawling a page</summary>
/// <remarks>
/// We use this wrapper class to enable using responses obtained by methods different than executing an HttpWebRequest.
/// E.g. one may use a browser control embedded in the application to get a page content and construct an instance of this class
/// to pass it to Abot.
/// </remarks>
public class HttpWebResponseWrapper
{
private HttpWebResponse InternalResponse;
private byte[] Content;
private Lazy<Stream> ContentStream;

#region Constructors

/// <summary>Constructs a response based on the received system http response.</summary>
public HttpWebResponseWrapper(HttpWebResponse response)
{
this.InternalResponse = response;

if (response == null)
return;

this.StatusCode = response.StatusCode;
this.ContentType = response.ContentType;
this.ContentLength = response.ContentLength;
this.Headers = response.Headers;
this.CharacterSet = response.CharacterSet;
this.ContentEncoding = response.ContentEncoding;
this.Cookies = response.Cookies;
this.IsFromCache = response.IsFromCache;
this.IsMutuallyAuthenticated = response.IsMutuallyAuthenticated;
this.LastModified = response.LastModified;
this.Method = response.Method;
this.ProtocolVersion = response.ProtocolVersion;
this.ResponseUri = response.ResponseUri;
this.Server = response.Server;
this.StatusDescription = response.StatusDescription;
}

/// <summary>Constructs a response based on custom parameters.</summary>
/// <remarks>Recieves parameters neccesarily set for Abot to work.</remarks>
public HttpWebResponseWrapper(HttpStatusCode statusCode, string contentType, byte[] content, NameValueCollection headers)
{
this.StatusCode = statusCode;
this.Headers = headers;
this.ContentType = contentType;
this.ContentLength = content != null ? content.Length : 0;
this.Content = content;
this.ContentStream = new Lazy<Stream>(() => this.Content != null ? new MemoryStream(this.Content) : null);
}

/// <summary>Constructs an empty response to be filled later.</summary>
public HttpWebResponseWrapper() { }

#endregion

#region Properties

/// <summary>Status code returned by the server</summary>
public HttpStatusCode StatusCode { get; set; }
/// <summary>Server designated type of content</summary>
public string ContentType { get; set; }
/// <summary>Server designated length of content in bytes</summary>
public long ContentLength { get; set; }
/// <summary>Collection of headers in the response</summary>
public NameValueCollection Headers { get; set; }
/// <summary>Gets the character set of the response.</summary>
public string CharacterSet { get; set; }
/// <summary>Gets the method that is used to encode the body of the response.</summary>
public string ContentEncoding { get; set; }
/// <summary>Gets or sets the cookies that are associated with this response.</summary>
public CookieCollection Cookies { get; set; }
/// <summary>Was the response generated from the local cache?</summary>
public bool IsFromCache { get; set; }
/// <summary>Gets a System.Boolean value that indicates whether both client and server were authenticated.</summary>
public bool IsMutuallyAuthenticated { get; set; }
/// <summary>Gets the last date and time that the contents of the response were modified.</summary>
public DateTime LastModified { get; set; }
/// <summary>Gets the method that is used to return the response.</summary>
public string Method { get; set; }
/// <summary>Gets the version of the HTTP protocol that is used in the response.</summary>
public Version ProtocolVersion { get; set; }
/// <summary>Gets the URI of the Internet resource that responded to the request.</summary>
public Uri ResponseUri { get; set; }
/// <summary>Gets the name of the server that sent the response.</summary>
public string Server { get; set; }
/// <summary>Gets the status description returned with the response.</summary>
public string StatusDescription { get; set; }

#endregion

#region Stream Methods

/// <summary>Gets the actual response data.</summary>
public Stream GetResponseStream()
{
return this.InternalResponse != null ?
this.InternalResponse.GetResponseStream() :
this.ContentStream.Value;
}

/// <summary>Gets the header with the given name.</summary>
public string GetResponseHeader(string header)
{
return this.Headers != null ? this.Headers[header] : null;
}

#endregion
}
}

0 comments on commit b5cfc16

Please sign in to comment.