Parsing STS logs in SharePoint 2003 was fairly well documented. However, that is not the case yet in 2007. There is now some content about log parsing in 2007. It is written in C++, so this should still be useful. Given the number of libraries I've seen on the net referencing this code, I assume that is the case.
First, there is an additional 300 bytes added to the head of each log file. The additional 300 bytes contains the text "Windows SharePoint Services HTTP log file" followed by space characters making up the difference. You can simply ignore these.
Secondly, the header of the each entry appears to have grown as well, including the ordering of the definition.
Header Data (50 bytes)
| Field |
Bytes |
Type |
|
Unused |
11 bytes |
? |
|
Site Url (length) |
2 bytes |
ushort |
|
Web (length) |
2 bytes |
ushort |
|
Doc (length) |
2 bytes |
ushort |
|
Unknown |
2 bytes |
? |
|
Bytes (data) |
4 bytes |
uint |
|
HTTP Status (data) |
2 bytes |
ushort |
|
Username (length) |
2 bytes |
ushort |
|
QueryString (length) |
2 bytes |
ushort |
|
Referral (length) |
2 bytes |
ushort |
|
User Agent (length) |
2 bytes |
ushort |
|
Command / Search Query (length) |
2 bytes |
ushort |
|
Unused |
15 bytes |
? |
|
Total |
50 bytes |
It also appears that the actual data has changed ordering a bit as well:
-
Site GUID
-
Timestamp
-
Site Url
-
Web
-
Doc
-
User
-
QueryString
-
Referral
-
User Agent
-
New GUID (don't know what this represents... anyone?)
-
Command / Search Query
I've created a parser in C#:
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Text;
using System.Runtime.InteropServices;
using System.ComponentModel;
namespace MOSS2007LogParser
{
public class Parser
{
private const int FILEHEADERLENGTH = 300;
private const int RECORDHEADERLENGTH = 50;
private List<LogRecord> _logRecords = new List<LogRecord>();
private void ReadLogFile(string logFilePath)
{
_logRecords.Clear();
// Open a stream to the log file
// I recommend using a Memory-mapped file here, as the files can grow quite large.
// http://www.winterdom.com/dev/dotnet/ shows a fairly good implementation [see: FileMap]
using (Stream s = File.OpenRead(logFilePath))
{
BinaryReader br = new BinaryReader(s);
// Read the file header
byte[] fileHeaderData = br.ReadBytes(FILEHEADERLENGTH);
while (s.Position < s.Length)
{
// Read the record header array
byte[] recordHeader = br.ReadBytes(RECORDHEADERLENGTH);
LogRecordHeader headerData = new LogRecordHeader(recordHeader);
// Get the length of the current record and read the array
int recordLength = headerData.RecordLength();
byte[] recordData = br.ReadBytes(recordLength);
_logRecords.Add(new LogRecord(headerData, recordData));
}
br.Close();
s.Close();
}
}
public DataSet GetLogDataSet(string logFilePath)
{
ReadLogFile(logFilePath);
DataSet dsSTSLogs = new DataSet("dsSTSLogs");
DataTable dtSTSLogs = new DataTable("dtSTSLogs");
DataColumn dcTimeStamp = new DataColumn("TimeStamp");
DataColumn dcSiteGuid = new DataColumn("SiteGUID");
DataColumn dcSiteUrl = new DataColumn("SiteUrl");
DataColumn dcWeb = new DataColumn("Web");
DataColumn dcDocument = new DataColumn("Document");
DataColumn dcUserName = new DataColumn("UserName");
DataColumn dcQueryString = new DataColumn("QueryString");
DataColumn dcReferral = new DataColumn("Referral");
DataColumn dcUserAgent = new DataColumn("UserAgent");
DataColumn dcCommand = new DataColumn("Command");
DataColumn dcHttpStatus = new DataColumn("HttpStatus");
DataColumn dcBytesSent = new DataColumn("BytesSent");
dtSTSLogs.Columns.Add(dcTimeStamp);
dtSTSLogs.Columns.Add(dcSiteGuid);
dtSTSLogs.Columns.Add(dcSiteUrl);
dtSTSLogs.Columns.Add(dcWeb);
dtSTSLogs.Columns.Add(dcDocument);
dtSTSLogs.Columns.Add(dcUserName);
dtSTSLogs.Columns.Add(dcQueryString);
dtSTSLogs.Columns.Add(dcReferral);
dtSTSLogs.Columns.Add(dcUserAgent);
dtSTSLogs.Columns.Add(dcCommand);
dtSTSLogs.Columns.Add(dcHttpStatus);
dtSTSLogs.Columns.Add(dcBytesSent);
dsSTSLogs.Tables.Add(dtSTSLogs);
foreach (LogRecord record in _logRecords)
{
DataRow drRecord = dtSTSLogs.NewRow();
drRecord["TimeStamp"] = record.TimeStamp;
drRecord["SiteGUID"] = record.SiteGUID;
drRecord["SiteUrl"] = record.SiteUrl;
drRecord["Web"] = record.Web;
drRecord["Document"] = record.Document;
drRecord["UserName"] = record.UserName;
drRecord["QueryString"] = record.QueryString;
drRecord["Referral"] = record.Referral;
drRecord["UserAgent"] = record.UserAgent;
drRecord["Command"] = record.Command;
drRecord["HttpStatus"] = record.Status;
drRecord["BytesSent"] = record.BytesSent;
dtSTSLogs.Rows.Add(drRecord);
}
return dsSTSLogs;
}
public void ConvertToCSVFile(string logFilePath)
{
string defaultPath = Path.Combine(
Path.GetDirectoryName(logFilePath),
Path.GetFileNameWithoutExtension(logFilePath)) + ".csv";
ConvertToCSVFile(logFilePath, defaultPath);
}
public void ConvertToCSVFile(string logFilePath, string csvFilePath)
{
ReadLogFile(logFilePath);
string recordLineFormat = "{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}";
using (StreamWriter sw = File.CreateText(csvFilePath))
{
foreach (LogRecord record in _logRecords)
{
sw.WriteLine(String.Format(recordLineFormat,
record.TimeStamp,
record.SiteGUID,
record.SiteUrl,
record.Web,
record.Document,
record.UserName,
record.QueryString,
record.Referral,
record.UserAgent,
record.Command,
record.BytesSent,
record.Status));
}
sw.Close();
}
}
}
public class LogRecordHeader
{
public const int STANDARD_GUID_LENGTH = 36;
public const int STANDARD_TIMESTAMP_LENGTH = 8;
private byte[] _recordHeader;
private int _siteUrlLength;
public int SiteUrlLength
{
get { return _siteUrlLength; }
}
private int _webLength;
public int WebLength
{
get { return _webLength; }
}
private int _docLength;
public int DocLength
{
get { return _docLength; }
}
private int _userNameLength;
public int UserNameLength
{
get { return _userNameLength; }
}
private long _bytesSent;
public long BytesSent
{
get { return _bytesSent; }
}
private int _httpStatus;
public int HttpStatus
{
get { return _httpStatus; }
}
private int _queryStringLength;
public int QueryStringLength
{
get { return _queryStringLength; }
}
private int _referralLength;
public int ReferralLength
{
get { return _referralLength; }
}
private int _userAgentLength;
public int UserAgentLength
{
get { return _userAgentLength; }
}
private int _commandLength;
public int CommandLength
{
get { return _commandLength; }
}
public LogRecordHeader(byte[] recordHeader)
{
this._recordHeader = recordHeader;
this._siteUrlLength = BitConverter.ToUInt16(recordHeader, SITEURL_OFFSET);
this._webLength = BitConverter.ToUInt16(recordHeader, WEB_OFFSET);
this._docLength = BitConverter.ToUInt16(recordHeader, DOC_OFFSET);
this._bytesSent = BitConverter.ToUInt32(recordHeader, BYTESSENT_OFFSET);
this._httpStatus = BitConverter.ToUInt16(recordHeader, HTTPSTATUS_OFFSET);
this._userNameLength = BitConverter.ToUInt16(recordHeader, USERNAME_OFFSET);
this._queryStringLength = BitConverter.ToUInt16(recordHeader, QUERYSTRING_OFFSET);
this._referralLength = BitConverter.ToUInt16(recordHeader, REFERRAL_OFFSET);
this._userAgentLength = BitConverter.ToUInt16(recordHeader, USERAGENT_OFFSET);
this._commandLength = BitConverter.ToUInt16(recordHeader, COMMAND_OFFSET);
}
private const int SITEURL_OFFSET = 12;
private const int WEB_OFFSET = 14;
private const int DOC_OFFSET = 16;
private const int BYTESSENT_OFFSET = 20;
private const int HTTPSTATUS_OFFSET = 24;
private const int USERNAME_OFFSET = 26;
private const int QUERYSTRING_OFFSET = 28;
private const int REFERRAL_OFFSET = 30;
private const int USERAGENT_OFFSET = 32;
private const int COMMAND_OFFSET = 34;
public int RecordLength()
{
return STANDARD_GUID_LENGTH + 1 +
STANDARD_TIMESTAMP_LENGTH + 1 +
SiteUrlLength + 1 +
WebLength + 1 +
DocLength + 1 +
UserNameLength + 1 +
QueryStringLength + 1 +
ReferralLength + 1 +
UserAgentLength + 1 +
STANDARD_GUID_LENGTH + 1 +
CommandLength + 1;
}
}
public class LogRecord
{
private string _siteGuid;
public string SiteGUID
{
get { return _siteGuid; }
}
private string _timeStamp;
public string TimeStamp
{
get { return _timeStamp; }
}
private string _siteUrl;
public string SiteUrl
{
get { return _siteUrl; }
}
private string _web;
public string Web
{
get { return _web; }
}
private string _document;
public string Document
{
get { return _document; }
}
private string _userName;
public string UserName
{
get { return _userName; }
}
private string _queryString;
public string QueryString
{
get { return _queryString; }
}
private string _referral;
public string Referral
{
get { return _referral; }
}
private string _userAgent;
public string UserAgent
{
get { return _userAgent; }
}
private string _command;
public string Command
{
get { return _command; }
}
private int _status;
public int Status
{
get { return _status; }
}
private long _bytesSent;
public long BytesSent
{
get { return _bytesSent; }
}
public LogRecord(LogRecordHeader headerData, byte[] recordData)
{
int timeStampOffset = LogRecordHeader.STANDARD_GUID_LENGTH + 1;
int siteUrlOffset = timeStampOffset + LogRecordHeader.STANDARD_TIMESTAMP_LENGTH + 1;
int webOffset = siteUrlOffset + headerData.SiteUrlLength + 1;
int documentOffset = webOffset + headerData.WebLength + 1;
int userNameOffset = documentOffset + headerData.DocLength + 1;
int queryStringOffset = userNameOffset + headerData.UserNameLength + 1;
int referralOffset = queryStringOffset + headerData.QueryStringLength + 1;
int userAgentOffset = referralOffset + headerData.ReferralLength + 1;
int commandOffset = userAgentOffset + headerData.UserAgentLength + LogRecordHeader.STANDARD_GUID_LENGTH + 2;
Encoding enc = UTF8Encoding.Default;
_siteGuid = enc.GetString(recordData, 0, LogRecordHeader.STANDARD_GUID_LENGTH);
_timeStamp = enc.GetString(recordData, timeStampOffset, LogRecordHeader.STANDARD_TIMESTAMP_LENGTH);
_siteUrl = enc.GetString(recordData, siteUrlOffset, headerData.SiteUrlLength);
_web = enc.GetString(recordData, webOffset, headerData.WebLength);
_document = enc.GetString(recordData, documentOffset, headerData.DocLength);
_userName = enc.GetString(recordData, userNameOffset, headerData.UserNameLength);
_queryString = enc.GetString(recordData, queryStringOffset, headerData.QueryStringLength);
_referral = enc.GetString(recordData, referralOffset, headerData.ReferralLength);
_userAgent = enc.GetString(recordData, userAgentOffset, headerData.UserAgentLength);
_command = enc.GetString(recordData, commandOffset, headerData.CommandLength);
_status = headerData.HttpStatus;
_bytesSent = headerData.BytesSent;
}
}
}