Created
December 2, 2017 06:57
-
-
Save MNF/4cd1fd31d6f49e5fe49530b683985d14 to your computer and use it in GitHub Desktop.
Import Apple Reviews
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Configuration; | |
using System.Data; | |
using System.Diagnostics; | |
using System.Diagnostics.Contracts; | |
using System.Globalization; | |
using System.Linq; | |
using System.Linq.Expressions; | |
using System.Threading.Tasks; | |
using System.Xml.Linq; | |
using AutoMapper; | |
using Flurl; | |
using Flurl.Http; | |
using Flurl.Http.Xml; | |
using Microsoft.SDC.Common; | |
using Microsoft.SDC.HtmlAgilityPack; | |
using Newtonsoft.Json; | |
using Newtonsoft.Json.Linq; | |
namespace WebJobs.ITunesReviews | |
{ | |
public class ImportAppleReviews | |
{ | |
/// <summary> | |
/// | |
/// </summary> | |
/// <param name="countryCode"></param> | |
/// <param name="applicationIdKvp"></param> | |
/// <param name="useRss">if true, only latest will be loaded </param> | |
/// <returns></returns> | |
public List<AppReview> GetReviews(string countryCode, KeyValuePair<string, string> applicationIdKvp, bool useRss) | |
{ | |
List<AppReview> reviews = new List<AppReview>(); | |
if (useRss) | |
{ | |
//json doesn't have Updated field, so use XML | |
string url = $"https://itunes.apple.com/{countryCode}/rss/customerreviews/id={applicationIdKvp.Value}/sortBy=mostRecent/xml"; | |
var doc = GetXDocument(url); | |
_backupSentimentsToBlob.SaveToBlobStorage($"AppleITunesReviews_{countryCode}_{applicationIdKvp.Value}", "xml", doc.ToString()); | |
reviews = XDocumentToAppReviews(doc); | |
} | |
else //try WebObjects/MZStore.woa from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py | |
{ | |
for (int i = 0; i < 999; i++) | |
{ | |
var res = GetReviewsFromWebObjects(countryCode, applicationIdKvp.Value, i); | |
var doc = res.Result; | |
_backupSentimentsToBlob.SaveToBlobStorage($"AppleWebObjectsReviews_{countryCode}_{applicationIdKvp.Value}_{i}", "xml", doc.ToString()); | |
IEnumerable<AppReview> newReviews = WebObjectXDocumentToAppReviews(doc); | |
if (newReviews.Any()) | |
{ | |
reviews.AddRange(newReviews); | |
} | |
else break; | |
//StreamHelper.SaveStringToFile(str, "OutputPage" + i.ToString() + ".xml"); | |
} | |
} | |
return reviews; | |
} | |
internal static IEnumerable<AppReview> WebObjectXDocumentToAppReviews(XDocument doc) | |
{ | |
//XNamespace ns = doc.Root.Name.Namespace; | |
//<VBoxView leftInset="10" rightInset="0" stretchiness="1" /> //not sure how fragile it is /consider other checks | |
var entries = doc.DescendantsAnyNS("VBoxView").Where(v=>(v.AttributeAnyNS("leftInset")?.Value=="10")&&(v.AttributeAnyNS("rightInset")?.Value == "0") && (v.AttributeAnyNS("stretchiness")?.Value == "1")); | |
var reviews = entries.Select(WebObjectCreateAppReview); | |
return reviews.Where(r => r != null).ToList(); | |
} | |
private static AppReview WebObjectCreateAppReview(XElement vboxView) | |
{ | |
try | |
{ | |
//example of xml in C:\GitRepos\AnalyticsScripts\SentimentAnalysis\WebJobs.ITunesReviews\MockedData\WebObjectsReviews.xml | |
var titleText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("TextView").ElementSameNS("SetFontStyle").Value; | |
titleText=HtmlAgilityPackHelper.StripHtmlTags(titleText); | |
if (titleText==null ) | |
return null; | |
// <HBoxView topInset="1" alt="5 stars"> | |
var ratingText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("HBoxView").ElementsSameNS("HBoxView").First().AttributeAnyNS("alt").Value; | |
var rating=ratingText.ExtractAndParse<int>(); | |
var sourceCustomerResponseId = vboxView.ElementsSameNS("HBoxView").First().ElementsSameNS("HBoxView").ElementsSameNS("HBoxView").ToList()[1].ElementsSameNS("VBoxView").First().ElementSameNS("GotoURL").AttributeAnyNS("url").Value.RightAfter("="); | |
/* | |
< SetFontStyle normalStyle = "textColor" > | |
by | |
< GotoURL target = "main" inhibitDragging = "false" url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewUsersUserReviews?userProfileId=427034608" > | |
< b > | |
Tay-lee-a | |
</ b > | |
</ GotoURL > | |
- | |
Version 4.2.3 | |
- | |
01 August 2016 | |
</ SetFontStyle > | |
*/ | |
var urlAndAuthorElement = vboxView.ElementsSameNS("HBoxView").ToList()[1].ElementAnyNS("TextView").ElementAnyNS("SetFontStyle"); | |
var verAndDateText = urlAndAuthorElement.Value; | |
var arrayVerAndDate=verAndDateText.Split('-'); | |
var ver = arrayVerAndDate.SecondLast().Replace("Version","",StringComparison.InvariantCultureIgnoreCase).Trim(); | |
var date = arrayVerAndDate.Last().Trim(); | |
var creationDate = DateTime.Parse(date); | |
var goToUrlElement = urlAndAuthorElement.ElementAnyNS("GotoURL"); | |
var customerName = HtmlAgilityPackHelper.StripHtmlTags(goToUrlElement.Value).Trim(); | |
var contentText = vboxView.ElementAnyNS("TextView").ElementAnyNS("SetFontStyle").Value; | |
return new AppReview(titleText, contentText) | |
{ | |
SourceUniqueId = sourceCustomerResponseId, | |
CreationDate = creationDate, | |
Rating = rating, | |
Version = ver, | |
CustomerName = customerName | |
}; | |
} | |
catch (Exception exc) | |
{ | |
var msg = "An error has occured when processing XElement "; | |
WebjobsLoggingHelper.LogError(msg, exc, vboxView.ToString()); | |
return null; | |
} | |
} | |
public static async Task<XDocument> GetReviewsFromWebObjects(string countryCode, string appId, int pageNo) | |
{ | |
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py | |
var userAgent = "iTunes/9.2 (Macintosh; U; Mac OS X 10.6)"; | |
// $country = "\nCOUNTRY: Australia";$store = 143460; | |
var front = GetStoreId(countryCode); //"%d-1" % appStoreId NOT -1 | |
var headers = new Dictionary<string, string>() | |
{ | |
{"X-Apple-Store-Front", front}, | |
{"User-Agent", userAgent} | |
}; | |
var url = String.Format( | |
"http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZStore.woa/wa/viewContentsUserReviews?id={0}&pageNumber={1}&sortOrdering=4&onlyLatestVersion=false&type=Purple+Software", | |
appId, pageNo); | |
var flurl=new FlurlClient(url); | |
foreach (var dictEntry in headers) | |
{ | |
flurl.WithHeader(dictEntry.Key, dictEntry.Value); | |
} | |
var res = await flurl.GetXDocumentAsync(); | |
return res; | |
} | |
private static XDocument GetXDocument(string url) | |
{ | |
var result = url.GetXDocumentAsync(); | |
var doc = result.Result; | |
Debug.WriteLine("From url " + url + " Response is " + doc); | |
return doc; | |
} | |
public static List<AppReview> XDocumentToAppReviews(XDocument doc) | |
{ | |
XNamespace ns = doc.Root.Name.Namespace; | |
var entries = doc.Descendants(ns+"entry"); | |
var reviews = entries.Select(CreateAppReview); | |
return reviews.Where(r=>r!=null).ToList(); | |
} | |
private static AppReview CreateAppReview(XElement e) | |
{ | |
var contentText = e.ElementsSameNS("content").FirstOrDefault(cont => cont.Attribute("type")?.Value=="text")?.Value; | |
if (contentText == null) | |
return null; | |
return new AppReview(e.ElementSameNS("title")?.Value, contentText) | |
{ | |
SourceUniqueId = e.ElementSameNS("id")?.Value, | |
CreationDate = DateTime.Parse(e.ElementSameNS("updated")?.Value), | |
Rating = StructExtensions.TryParse(e.ElementAnyNS("rating")?.Value,0), | |
Version = e.ElementAnyNS("version")?.Value, | |
CustomerName = e.ElementSameNS("author")?.ElementSameNS("name")?.Value | |
}; | |
} | |
private static string GetStoreId(string countryCode) | |
{ | |
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py | |
//TODO: ADD other countries if need | |
string storeId = ""; | |
switch (countryCode) | |
{ | |
case "": | |
storeId = "143441";//USA? | |
break; | |
case "AU": | |
storeId = "143460"; | |
break; | |
case "NZ": | |
storeId = "143461"; | |
break; | |
case "SG": | |
storeId = "143464"; | |
break; | |
case "HK": | |
storeId = "143463"; | |
break; | |
} | |
return storeId; | |
} | |
} | |
} |
full overview of storefront id's can be found here:
https://affiliate.itunes.apple.com/resources/documentation/linking-to-the-itunes-music-store/#appendix
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The class shows how to read Apple Reviews using 2 methods- latest using RSS and historical using WebObjects.
The class is extracted fro the proprietary application and may have missing references(and as such will not be compilable).
Also the application was created in 2016 and is not actively maintained since than, so branch using WebObjects may be broken.