Last active
November 13, 2017 08:01
-
-
Save blacktambourine/1c3f8b43800fdfc43fdc68bcc9293960 to your computer and use it in GitHub Desktop.
Custom Item Crawler for Lucene to crawl Json data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Linq; | |
using System.Threading; | |
using Newtonsoft.Json; | |
//using CustomModel.Flights.Model; | |
using Sitecore.Collections; | |
using Sitecore.ContentSearch; | |
using Sitecore.ContentSearch.Abstractions; | |
using Sitecore.ContentSearch.Diagnostics; | |
using Sitecore.ContentSearch.Pipelines.GetContextIndex; | |
using Sitecore.Data; | |
using Sitecore.Data.Items; | |
using Sitecore.Data.Managers; | |
using Sitecore.Diagnostics; | |
using Sitecore.Globalization; | |
using Sitecore.SecurityModel; | |
using Version = Sitecore.Data.Version; | |
namespace Business.Search.Json | |
{ | |
public class FidsJsonItemCrawler : HierarchicalDataCrawler<SitecoreIndexableItem>, IContextIndexRankable | |
{ | |
private string database; | |
private string root; | |
private Item rootItem; | |
private volatile int rootItemErrorLogged; | |
public FidsJsonItemCrawler() | |
{ | |
} | |
public FidsJsonItemCrawler(IIndexOperations indexOperations): base(indexOperations) | |
{ | |
} | |
public string Database | |
{ | |
get | |
{ | |
if (!string.IsNullOrEmpty(this.database)) | |
return this.database; | |
return (string)null; | |
} | |
set | |
{ | |
this.database = value; | |
} | |
} | |
public string Root | |
{ | |
get | |
{ | |
if (string.IsNullOrEmpty(this.root)) | |
{ | |
Sitecore.Data.Database database = ContentSearchManager.Locator.GetInstance<IFactory>().GetDatabase(this.database); | |
Assert.IsNotNull((object)database, "Database " + this.database + " does not exist"); | |
using (new SecurityDisabler()) | |
{ | |
this.root = database.GetRootItem().ID.ToString(); | |
} | |
} | |
return this.root; | |
} | |
set | |
{ | |
this.root = value; | |
this.rootItem = (Item)null; | |
} | |
} | |
public Item RootItem | |
{ | |
get | |
{ | |
this.rootItem = this.GetRootItem(); | |
if (this.rootItem == null) | |
throw new InvalidOperationException(string.Format("[Index={0}, Crawler={1}, Database={2}] Root item could not be found: {3}.", this.index != null ? (object)this.index.Name : (object)"NULL", (object)typeof(SitecoreItemCrawler).Name, (object)this.database, (object)this.root)); | |
return this.rootItem; | |
} | |
} | |
private Item GetRootItem() | |
{ | |
if (this.rootItem == null) | |
{ | |
Sitecore.Data.Database database = ContentSearchManager.Locator.GetInstance<IFactory>().GetDatabase(this.database); | |
Assert.IsNotNull((object)database, "Database " + this.database + " does not exist"); | |
using (new SecurityDisabler()) | |
{ | |
this.rootItem = database.GetItem(this.Root); | |
if (this.rootItem == null) | |
{ | |
if (this.rootItemErrorLogged == 0) | |
{ | |
Interlocked.Increment(ref this.rootItemErrorLogged); | |
string message = string.Format("[Index={0}, Crawler={1}, Database={2}] Root item could not be found: {3}.", this.index != null ? (object)this.index.Name : (object)"NULL", (object)typeof(SitecoreItemCrawler).Name, (object)this.database, (object)this.root); | |
CrawlingLog.Log.Error(message, (Exception)null); | |
Log.Error(message, (object)this); | |
} | |
} | |
} | |
} | |
return this.rootItem; | |
} | |
public override void Initialize(ISearchIndex index) | |
{ | |
Assert.ArgumentNotNull((object)index, nameof(index)); | |
Assert.IsNotNull((object)this.Database, "Database element not set."); | |
Assert.IsNotNull((object)this.Root, "Root element not set."); | |
if (this.Operations == null) | |
{ | |
this.Operations = index.Operations; | |
CrawlingLog.Log.Info(string.Format("[Index={0}] Initializing {3}. DB:{1} / Root:{2}", (object)index.Name, (object)this.Database, (object)this.Root, (object)typeof(SitecoreItemCrawler).Name), (Exception)null); | |
} | |
base.Initialize(index); | |
} | |
public virtual int GetContextIndexRanking(IIndexable indexable) | |
{ | |
SitecoreIndexableItem sitecoreIndexableItem = indexable as SitecoreIndexableItem; | |
if (sitecoreIndexableItem == null || this.GetRootItem() == null) | |
return int.MaxValue; | |
Item obj = (Item)sitecoreIndexableItem; | |
using (new SecurityDisabler()) | |
{ | |
using (new SitecoreCachesDisabler()) | |
return obj.Axes.Level - this.RootItem.Axes.Level; | |
} | |
} | |
public override bool IsExcludedFromIndex(IIndexable indexable) | |
{ | |
return this.IsExcludedFromIndex((SitecoreIndexableItem)indexable, true); | |
} | |
protected override bool IsExcludedFromIndex(SitecoreIndexableItem indexable, bool checkLocation = false) | |
{ | |
Item obj = (Item)indexable; | |
Assert.ArgumentNotNull(obj, "item"); | |
IDocumentBuilderOptions documentOptions = this.DocumentOptions; | |
Assert.IsNotNull(documentOptions, "DocumentOptions"); | |
if (!obj.Database.Name.Equals(this.Database, StringComparison.InvariantCultureIgnoreCase)) | |
{ | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", this.index.Name, obj.Uri); | |
return true; | |
} | |
if (checkLocation) | |
{ | |
if (this.GetRootItem() == null) | |
return true; | |
if (!this.IsAncestorOf(obj)) | |
{ | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", this.index.Name, obj.Uri); | |
return true; | |
} | |
} | |
if (documentOptions.HasIncludedTemplates) | |
{ | |
if (documentOptions.HasExcludedTemplates) | |
CrawlingLog.Log.Warn("You have specified both IncludeTemplates and ExcludeTemplates. This logic is not supported. Exclude templates will be ignored.", (Exception)null); | |
if (documentOptions.IncludedTemplates.Contains(obj.TemplateID.ToString())) | |
return false; | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri); | |
return true; | |
} | |
if (documentOptions.HasIncludedTemplates) | |
{ | |
if (documentOptions.HasExcludedTemplates) | |
CrawlingLog.Log.Warn("You have specified both IncludeTemplates and ExcludeTemplates. This logic is not supported. Exclude templates will be ignored.", (Exception)null); | |
if (documentOptions.IncludedTemplates.Contains(((object)obj.TemplateID).ToString())) | |
return false; | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri); | |
return true; | |
} | |
if (!documentOptions.ExcludedTemplates.Contains(((object)obj.TemplateID).ToString())) | |
return false; | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri); | |
return true; | |
} | |
protected virtual bool IsAncestorOf(Item item) | |
{ | |
using (new SecurityDisabler()) | |
{ | |
using (new SitecoreCachesDisabler()) | |
{ | |
if (this.RootItem != null) | |
return this.RootItem.Axes.IsAncestorOf(item); | |
} | |
} | |
return false; | |
} | |
protected override bool IsExcludedFromIndex(IIndexableUniqueId indexableUniqueId) | |
{ | |
return !((ItemUri)(indexableUniqueId as SitecoreItemUniqueId)).DatabaseName.Equals(this.Database, StringComparison.InvariantCultureIgnoreCase); | |
} | |
protected override void DoAdd(IProviderUpdateContext context, SitecoreIndexableItem indexable) | |
{ | |
Assert.ArgumentNotNull((object)context, nameof(context)); | |
Assert.ArgumentNotNull((object)indexable, nameof(indexable)); | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:adding", (object)context.Index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
foreach (Language language in indexable.Item.Languages) | |
{ | |
Item obj1; | |
using (new SitecoreCachesDisabler()) | |
{ | |
obj1 = indexable.Item.Database.GetItem(indexable.Item.ID, language, Version.Latest); | |
} | |
if (obj1 == null) | |
{ | |
CrawlingLog.Log.Warn(string.Format("SitecoreItemCrawler : AddItem : Could not build document data {0} - Latest version could not be found. Skipping.", (object)indexable.Item.Uri), (Exception)null); | |
} | |
else | |
{ | |
Item currentVersion; | |
using (new SitecoreCachesDisabler()) | |
{ | |
currentVersion = obj1.Versions.GetVersions(false).FirstOrDefault(); //should only be one version | |
} | |
if (currentVersion == null) | |
{ | |
return; | |
} | |
var json = currentVersion.Fields["RawJson"].Value; | |
if (json == string.Empty) | |
{ | |
return; | |
} | |
//convert the dates | |
var dateFormat = new Newtonsoft.Json.Converters.IsoDateTimeConverter(); | |
dateFormat.DateTimeFormat = "yyyy-MM-ddTmm:hh:ss"; | |
//FidsResponse is a custom class to map the Json to | |
var allFlights = JsonConvert.DeserializeObject<FidsResponse>(json, dateFormat); | |
if (allFlights != null && allFlights.Airlines != null && allFlights.Airlines.Any()) | |
{ | |
foreach (var airline in allFlights.Airlines) | |
{ | |
var jsonIndexableItem = (JsonIndexableItem)airline; | |
jsonIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter; | |
this.Operations.Delete((IIndexable)indexable, context); //remove the item to ensure it is updated | |
this.Operations.Add((IIndexable)jsonIndexableItem, context, this.index.Configuration); //add or re-add the item | |
} | |
} | |
} | |
} | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:added", (object)context.Index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
} | |
protected override void DoUpdate(IProviderUpdateContext context, SitecoreIndexableItem indexable) | |
{ | |
Assert.ArgumentNotNull((object)context, nameof(context)); | |
Assert.ArgumentNotNull((object)indexable, nameof(indexable)); | |
if (this.IndexUpdateNeedDelete(indexable)) | |
{ | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:deleteitem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
this.Operations.Delete((IIndexable)indexable, context); | |
} | |
else | |
{ | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updatingitem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
foreach (Language language in indexable.Item.Languages) | |
{ | |
Item obj1; | |
using (new SitecoreCachesDisabler()) | |
obj1 = indexable.Item.Database.GetItem(indexable.Item.ID, language, Sitecore.Data.Version.Latest); | |
if (obj1 == null) | |
{ | |
CrawlingLog.Log.Warn(string.Format("SitecoreItemCrawler : Update : Latest version not found for item {0}. Skipping.", (object)indexable.Item.Uri), (Exception)null); | |
} | |
else | |
{ | |
Item currentVersion; | |
using (new SitecoreCachesDisabler()) | |
{ | |
currentVersion = obj1.Versions.GetVersions(false).FirstOrDefault(); //should only be one version | |
} | |
if (currentVersion == null) | |
{ | |
return; | |
} | |
var json = currentVersion.Fields["RawJson"].Value; | |
if (json == string.Empty) | |
{ | |
return; | |
} | |
//convert the dates | |
var dateFormat = new Newtonsoft.Json.Converters.IsoDateTimeConverter(); | |
dateFormat.DateTimeFormat = "yyyy-MM-ddTmm:hh:ss"; | |
var allFlights = JsonConvert.DeserializeObject<FidsResponse>(json, dateFormat); | |
if (allFlights != null && allFlights.Airlines != null && allFlights.Airlines.Any()) | |
{ | |
foreach (var airline in allFlights.Airlines) | |
{ | |
var jsonIndexableItem = (JsonIndexableItem)airline; | |
jsonIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter; | |
this.Operations.Update((IIndexable)jsonIndexableItem, context, this.index.Configuration); | |
} | |
} | |
} | |
} | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updateditem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
if (!this.DocumentOptions.ProcessDependencies) | |
return; | |
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updatedependents", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath); | |
this.UpdateDependents(context, indexable); | |
} | |
} | |
private void UpdateClones(IProviderUpdateContext context, SitecoreIndexableItem versionIndexable) | |
{ | |
//not applicable | |
} | |
internal SitecoreIndexableItem PrepareIndexableVersion(Item item, IProviderUpdateContext context) | |
{ | |
SitecoreIndexableItem sitecoreIndexableItem = (SitecoreIndexableItem)item; | |
((IIndexableBuiltinFields)sitecoreIndexableItem).IsLatestVersion = item.Versions.IsLatestVersion(); | |
sitecoreIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter; | |
return sitecoreIndexableItem; | |
} | |
protected override SitecoreIndexableItem GetIndexable(IIndexableUniqueId indexableUniqueId) | |
{ | |
using (new SecurityDisabler()) | |
{ | |
using (new SitecoreCachesDisabler()) | |
return (SitecoreIndexableItem)Sitecore.Data.Database.GetItem((ItemUri)(indexableUniqueId as SitecoreItemUniqueId)); | |
} | |
} | |
protected override bool GroupShouldBeDeleted(IIndexableId indexableId) | |
{ | |
Assert.ArgumentNotNull((object)indexableId, nameof(indexableId)); | |
SitecoreItemId sitecoreItemId = indexableId as SitecoreItemId; | |
if (sitecoreItemId == null) | |
return false; | |
Sitecore.Data.Database database = this.Index.Locator.GetInstance<IFactory>().GetDatabase(this.Database); | |
Item obj; | |
using (new SitecoreCachesDisabler()) | |
obj = database.GetItem((ID)sitecoreItemId); | |
return obj == null; | |
} | |
protected override SitecoreIndexableItem GetIndexableAndCheckDeletes(IIndexableUniqueId indexableUniqueId) | |
{ | |
ItemUri itemUri = (ItemUri)(indexableUniqueId as SitecoreItemUniqueId); | |
using (new SecurityDisabler()) | |
{ | |
Item obj1; | |
using (new SitecoreCachesDisabler()) | |
obj1 = Sitecore.Data.Database.GetItem(itemUri); | |
if (obj1 != null) | |
{ | |
Item obj2 = Sitecore.Data.Database.GetItem(new ItemUri(itemUri.ItemID, itemUri.Language, Version.Latest, itemUri.DatabaseName)); | |
Version[] versionArray; | |
using (new SitecoreCachesDisabler()) | |
versionArray = obj2.Versions.GetVersionNumbers() ?? new Version[0]; | |
if (((IEnumerable<Version>)versionArray).All<Version>((Func<Version, bool>)(v => v.Number != itemUri.Version.Number))) | |
obj1 = (Item)null; | |
} | |
return (SitecoreIndexableItem)obj1; | |
} | |
} | |
protected override bool IndexUpdateNeedDelete(SitecoreIndexableItem indexable) | |
{ | |
return false; | |
} | |
protected override IEnumerable<IIndexableUniqueId> GetIndexablesToUpdateOnDelete(IIndexableUniqueId indexableUniqueId) | |
{ | |
ItemUri itemUri = indexableUniqueId.Value as ItemUri; | |
using (new SecurityDisabler()) | |
{ | |
ItemUri latestItemUri = new ItemUri(itemUri.ItemID, itemUri.Language, Version.Latest, itemUri.DatabaseName); | |
Item latestItem; | |
using (new SitecoreCachesDisabler()) | |
latestItem = Sitecore.Data.Database.GetItem(latestItemUri); | |
if (latestItem != null && latestItem.Version.Number < itemUri.Version.Number) | |
yield return (IIndexableUniqueId)new SitecoreItemUniqueId(latestItem.Uri); | |
} | |
} | |
public override SitecoreIndexableItem GetIndexableRoot() | |
{ | |
using (new SecurityDisabler()) | |
return (SitecoreIndexableItem)this.RootItem; | |
} | |
protected override IEnumerable<IIndexableId> GetIndexableChildrenIds(SitecoreIndexableItem parent) | |
{ | |
ChildList childList = this.GetChildList(parent.Item); | |
if (childList.Count == 0) | |
return (IEnumerable<IIndexableId>)null; | |
return (IEnumerable<IIndexableId>)((IEnumerable<Item>)childList).Select<Item, SitecoreItemId>((Func<Item, SitecoreItemId>)(i => (SitecoreItemId)i.ID)); | |
} | |
protected override IEnumerable<SitecoreIndexableItem> GetIndexableChildren(SitecoreIndexableItem parent) | |
{ | |
ChildList childList = this.GetChildList(parent.Item); | |
if (childList.Count == 0) | |
return (IEnumerable<SitecoreIndexableItem>)null; | |
return ((IEnumerable<Item>)childList).Select<Item, SitecoreIndexableItem>((Func<Item, SitecoreIndexableItem>)(i => (SitecoreIndexableItem)i)); | |
} | |
protected virtual ChildList GetChildList(Item parent) | |
{ | |
using (new SitecoreCachesDisabler()) | |
return parent.GetChildren((ChildListOptions)5); | |
} | |
protected override SitecoreIndexableItem GetIndexable(IIndexableId indexableId, CultureInfo culture) | |
{ | |
using (new SecurityDisabler()) | |
{ | |
using (new SitecoreCachesDisabler()) | |
{ | |
Language language = LanguageManager.GetLanguage(culture.Name, this.RootItem.Database); | |
return (SitecoreIndexableItem)ItemManager.GetItem((ID)(indexableId as SitecoreItemId), language, Version.Latest, this.RootItem.Database, (SecurityCheck)1); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment