In my previous post http://www.sharepointblogs.com/mingssn/archive/2007/10/06/word-documents-to-sharepoint.aspx explained the idea of edit Sharepoint aspx content directly instead of using the out of box converter. This is the complete class to import word html into Sharepoint. Somehow, SPWeb, SPSite, SPWebcollection, SPSiteCollection is kind of confusing. Make sure read document from MSDN
Also, this class removed the <style> section from the html file...
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using Microsoft.SharePoint;
using Microsoft.SharePoint.Publishing;
namespace PPM
{
class PPMWeb
{
private string RootSiteUrl;
private string PPMTargetSiteUrl;
private SPSite Sites;
#region properties
public string _RootSiteUrl
{
get { return RootSiteUrl; }
set { RootSiteUrl = value; }
}
public SPSite _Sites
{
get { return new SPSite(this._RootSiteUrl); }
set { Sites = value; }
}
public SPWebCollection _Webs
{
get { return this._Sites.AllWebs; }
}
public string _PPMAspxPageSiteUrl
{
get { return this.PPMTargetSiteUrl; }
set { PPMTargetSiteUrl = value; }
}
public SPWeb this[string SiteName]
{
get
{
int i = 0;
foreach (SPWeb web in this._Webs)
{
if (web.Url == SiteName)
return this._Webs[ i ] ;
i++;
}
return null;
}
}
#endregion
public PPMWeb(string RootSiteUrl, string PPMAspxSiteUrl)
{
this._RootSiteUrl = RootSiteUrl;
this._PPMAspxPageSiteUrl = PPMAspxSiteUrl;
}
public void ConvertHtmlToAspx(string HtmlFileFolder, string HtmlFileName, string TargetWeb, PageLayout PPMPageLayout, string PPMContentField)
{
foreach (SPWeb web in this._Webs)
{
if (web.Url == TargetWeb )
{
PublishingPage AspxPage = GetPublishingPagebyName(HtmlFileName.Replace(".htm", ".aspx"), web);
if (AspxPage != null) //if page exists, update content field
{
UpdateAspxPageContent(web, HtmlFileFolder, HtmlFileName, PPMContentField, false);
}
else //if page not exists, create page first
{
PublishingWeb publishingWeb = PublishingWeb.GetPublishingWeb(web);
// Create the new page in the PublishingWeb.
PublishingPageCollection pages = publishingWeb.GetPublishingPages();
pages.Add(HtmlFileName.Replace(".htm", ".aspx"), PPMPageLayout);
// Update content
UpdateAspxPageContent(web, HtmlFileFolder, HtmlFileName, PPMContentField, true);
}
}
}
}
public void UpdateAspxPageContent(SPWeb Web, string HtmlFileFolder, string HtmlFileName, string PPMContentField, bool AlreadyCheckedOut)
{
PublishingPage AspxPage = GetPublishingPagebyName(HtmlFileName.Replace(".htm", ".aspx"), Web);
if (AspxPage != null) //if page exists, update content field
{
try
{
if (!AlreadyCheckedOut)
AspxPage.CheckOut();
AspxPage.ListItem[PPMContentField] = GetHtmlContentWithouStyle(HtmlFileFolder, HtmlFileName);
AspxPage.Update();
}
catch { }
finally
{
AspxPage.CheckIn("Your comment here...");
}
}
}
public PublishingPage GetPublishingPagebyName(string AspxFileName, SPWeb TargetWeb)
{
PublishingWeb publishingWeb = PublishingWeb.GetPublishingWeb(TargetWeb);
PublishingPageCollection pages = publishingWeb.GetPublishingPages();
foreach (PublishingPage page in pages)
{
if (page.Name == AspxFileName)
{
return page;
}
}
return null;
}
public string GetHtmlContentWithouStyle(string HtmlFileFolder, string HtmlFileName)
{
StreamReader rdr = new StreamReader(HtmlFileFolder + HtmlFileName, System.Text.Encoding.Default);
// Search through the stream until we reach the end
bool StyleHeadFound = false;
bool StyleEndFound = false;
StringBuilder HtmlContent = new StringBuilder();
while (!rdr.EndOfStream)
{
string line = rdr.ReadLine();
if (StyleHeadFound && StyleEndFound)
HtmlContent.Append(line);
else
{
if (!StyleHeadFound)
{
if (line == "<style>")
{
StyleHeadFound = true;
continue;
}
HtmlContent.Append(line);
}
else
{
if (!StyleEndFound)
{
if (line == "</style>")
{
StyleEndFound = true;
continue;
}
}
}
}
}
return HtmlContent.ToString();
}
public PageLayout GetPPMPageLayout(string PPMPageLayoutTitle)
{
PublishingSite publishingSite = new PublishingSite(this._Sites);
foreach (PageLayout pageLayout in publishingSite.PageLayouts)
if (pageLayout.Title == PPMPageLayoutTitle)
return pageLayout;
return null;
}
}
}