Last active
November 12, 2016 02:08
-
-
Save seankearney/6941029 to your computer and use it in GitHub Desktop.
Simple Sitecore/ASP.NET page to check what IFilters are known to the Sitecore system. It will also attempt to locate a media library item of each type and read text out of that file. This should help troubleshoot why some content inside of media library items are not being indexed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<%@ Page Language="C#" %> | |
<script runat="server"> | |
protected void Page_Load(object sender, EventArgs e) | |
{ | |
// These are the extensions that Sitecore 7 supports indexing content for | |
// See: Sitecore.ContentSearch.ComputedFields.MediaItemContentExtractor | |
string[] extensions = new[]{ ".pdf", | |
".html", | |
".rtf", | |
".odt", | |
".doc", | |
".dot", | |
".docx", | |
".dotx", | |
".docm", | |
".dotm", | |
".xls", | |
".xlt", | |
".xla", | |
".xlsx", | |
".xltx", | |
".xlsm", | |
".xltm", | |
".xlam", | |
".xlsb", | |
".ppt", | |
".pot", | |
".pps", | |
".ppa", | |
".pptx", | |
".potx", | |
".ppsx", | |
".ppam", | |
".pptm", | |
".potm", | |
".ppsm"}; | |
System.Collections.Generic.List<IFilterRegistration> registrations = GetRegistrations(extensions).ToList(); | |
InstalledFilters.DataSource = registrations; | |
InstalledFilters.DataBind(); | |
//Try the FilterReader class read a few characters | |
char[] chars = new char[10]; | |
using (Sitecore.ContentSearch.Extracters.IFilterTextExtraction.FilterReader reader = new Sitecore.ContentSearch.Extracters.IFilterTextExtraction.FilterReader(Server.MapPath("~/temp/Word-2010.docx"))) | |
{ | |
reader.Read(chars, 0, 5); | |
} | |
Response.Write(string.Join("", chars)); | |
} | |
protected System.Collections.Generic.IEnumerable<IFilterRegistration> GetRegistrations(params string[] extentions) | |
{ | |
System.Type filterLoader = System.Type.GetType("Sitecore.ContentSearch.Extracters.IFilterTextExtraction.FilterLoader, Sitecore.ContentSearch"); | |
// private static bool GetFilterDllAndClass(string ext, out string dllName, out string filterPersistClass) | |
System.Reflection.MethodInfo dynMethod = filterLoader.GetMethod("GetFilterDllAndClass", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); | |
foreach (string extension in extentions) | |
{ | |
string dllName = null; | |
string filterPersistClass = null; | |
object[] args = new object[] { extension, dllName, filterPersistClass }; | |
dynMethod.Invoke(null, args); | |
System.Reflection.MethodInfo loadFilterFromDllMethod = filterLoader.GetMethod("LoadFilterFromDll", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static); | |
Sitecore.ContentSearch.Extracters.IFilterTextExtraction.IFilter ifilter = loadFilterFromDllMethod.Invoke(null, new object[] { dllName, filterPersistClass }) as Sitecore.ContentSearch.Extracters.IFilterTextExtraction.IFilter; | |
if (ifilter == null) | |
{ | |
throw new Exception("ifilter == null"); | |
} | |
var extractor = new Sitecore.ContentSearch.ComputedFields.MediaItemIFilterTextExtractor(); | |
if (extractor == null) | |
{ | |
throw new Exception("Extractor is null!"); | |
} | |
Sitecore.Data.Items.Item item = GetMediaItem(extension); | |
string content = ""; | |
string sampleItemPath = "Item not found."; | |
if (item != null) | |
{ | |
sampleItemPath = item.Paths.Path; | |
var indexableItem = (Sitecore.ContentSearch.SitecoreIndexableItem)item; | |
try | |
{ | |
object value = extractor.ComputeFieldValue(indexableItem); | |
content = (value ?? "").ToString(); | |
} | |
catch(Exception e) | |
{ | |
content = string.Concat("Exception trying to execute: ", e.Message); | |
} | |
} | |
yield return new IFilterRegistration | |
{ | |
Extension = extension, | |
DLL = (args[1] ?? "").ToString(), | |
FilterPersistClass = (args[2] ?? "").ToString(), | |
SampleItem = sampleItemPath, | |
SampleContent = content | |
}; | |
} | |
} | |
// Looks in Sitecore for a media library item of a certain type | |
protected Sitecore.Data.Items.Item GetMediaItem(string extension) | |
{ | |
extension = extension.TrimStart('.'); | |
using (var context = Sitecore.ContentSearch.ContentSearchManager.GetIndex("sitecore_master_index").CreateSearchContext()) | |
{ | |
Sitecore.ContentSearch.SearchTypes.SearchResultItem sri = context.GetQueryable<Sitecore.ContentSearch.SearchTypes.SearchResultItem>() | |
//.Where(i => i.Path.StartsWith("/sitecore/media library", StringComparison.OrdinalIgnoreCase)) | |
.Where(i => string.Equals(i["Extension"], extension, StringComparison.OrdinalIgnoreCase)) | |
.FirstOrDefault(); | |
if (sri != null) | |
{ | |
return sri.GetItem(); | |
} | |
} | |
return null; | |
} | |
protected class IFilterRegistration | |
{ | |
public string Extension { get; set; } | |
public string DLL { get; set; } | |
public string FilterPersistClass { get; set; } | |
public string SampleItem { get; set; } | |
public string SampleContent { get; set; } | |
} | |
</script> | |
<!DOCTYPE html> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head runat="server"> | |
<title></title> | |
</head> | |
<body> | |
<form id="form1" runat="server"> | |
<div> | |
ContentSearchConfigurationSettings.MediaIndexingFolder = <%=Sitecore.ContentSearch.Utilities.ContentSearchConfigurationSettings.MediaIndexingFolder %> | |
<h1>Known IFilters</h1> | |
<table border="1"> | |
<asp:Repeater ID="InstalledFilters" runat="server"> | |
<ItemTemplate> | |
<tr> | |
<td><%#Eval("Extension") %></td> | |
<td><%#Eval("DLL") %></td> | |
<td><%#Eval("FilterPersistClass") %></td> | |
<td><%#Eval("SampleItem") %></td> | |
<td><%#Eval("SampleContent") %></td> | |
</tr> | |
</ItemTemplate> | |
</asp:Repeater> | |
</table> | |
</div> | |
</form> | |
</body> | |
</html> |
You need to break up the gist a little: make an aspx page that inherits a class with the contents of the <script> tag as it's body. Put the class in a project that compiles with the assembly name "Sitecore.ContentSearch.UnitTests". This way, the internal types will be visible to your project.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How are you instantiating Sitecore.ContentSearch.Extracters.IFilterTextExtraction.IFilter? This is a protected class in 7.5, also in 7.1. Thanks!