Skip to content

Instantly share code, notes, and snippets.

@MNF
Created December 2, 2017 06:57
Show Gist options
  • Save MNF/4cd1fd31d6f49e5fe49530b683985d14 to your computer and use it in GitHub Desktop.
Save MNF/4cd1fd31d6f49e5fe49530b683985d14 to your computer and use it in GitHub Desktop.
Import Apple Reviews
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.Globalization;
using System.Linq;
using System.Linq.Expressions;
using System.Threading.Tasks;
using System.Xml.Linq;
using AutoMapper;
using Flurl;
using Flurl.Http;
using Flurl.Http.Xml;
using Microsoft.SDC.Common;
using Microsoft.SDC.HtmlAgilityPack;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace WebJobs.ITunesReviews
{
public class ImportAppleReviews
{
/// <summary>
///
/// </summary>
/// <param name="countryCode"></param>
/// <param name="applicationIdKvp"></param>
/// <param name="useRss">if true, only latest will be loaded </param>
/// <returns></returns>
public List<AppReview> GetReviews(string countryCode, KeyValuePair<string, string> applicationIdKvp, bool useRss)
{
List<AppReview> reviews = new List<AppReview>();
if (useRss)
{
//json doesn't have Updated field, so use XML
string url = $"https://itunes.apple.com/{countryCode}/rss/customerreviews/id={applicationIdKvp.Value}/sortBy=mostRecent/xml";
var doc = GetXDocument(url);
_backupSentimentsToBlob.SaveToBlobStorage($"AppleITunesReviews_{countryCode}_{applicationIdKvp.Value}", "xml", doc.ToString());
reviews = XDocumentToAppReviews(doc);
}
else //try WebObjects/MZStore.woa from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
{
for (int i = 0; i < 999; i++)
{
var res = GetReviewsFromWebObjects(countryCode, applicationIdKvp.Value, i);
var doc = res.Result;
_backupSentimentsToBlob.SaveToBlobStorage($"AppleWebObjectsReviews_{countryCode}_{applicationIdKvp.Value}_{i}", "xml", doc.ToString());
IEnumerable<AppReview> newReviews = WebObjectXDocumentToAppReviews(doc);
if (newReviews.Any())
{
reviews.AddRange(newReviews);
}
else break;
//StreamHelper.SaveStringToFile(str, "OutputPage" + i.ToString() + ".xml");
}
}
return reviews;
}
internal static IEnumerable<AppReview> WebObjectXDocumentToAppReviews(XDocument doc)
{
//XNamespace ns = doc.Root.Name.Namespace;
//<VBoxView leftInset="10" rightInset="0" stretchiness="1" /> //not sure how fragile it is /consider other checks
var entries = doc.DescendantsAnyNS("VBoxView").Where(v=>(v.AttributeAnyNS("leftInset")?.Value=="10")&&(v.AttributeAnyNS("rightInset")?.Value == "0") && (v.AttributeAnyNS("stretchiness")?.Value == "1"));
var reviews = entries.Select(WebObjectCreateAppReview);
return reviews.Where(r => r != null).ToList();
}
private static AppReview WebObjectCreateAppReview(XElement vboxView)
{
try
{
//example of xml in C:\GitRepos\AnalyticsScripts\SentimentAnalysis\WebJobs.ITunesReviews\MockedData\WebObjectsReviews.xml
var titleText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("TextView").ElementSameNS("SetFontStyle").Value;
titleText=HtmlAgilityPackHelper.StripHtmlTags(titleText);
if (titleText==null )
return null;
// <HBoxView topInset="1" alt="5 stars">
var ratingText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("HBoxView").ElementsSameNS("HBoxView").First().AttributeAnyNS("alt").Value;
var rating=ratingText.ExtractAndParse<int>();
var sourceCustomerResponseId = vboxView.ElementsSameNS("HBoxView").First().ElementsSameNS("HBoxView").ElementsSameNS("HBoxView").ToList()[1].ElementsSameNS("VBoxView").First().ElementSameNS("GotoURL").AttributeAnyNS("url").Value.RightAfter("=");
/*
< SetFontStyle normalStyle = "textColor" >
by
< GotoURL target = "main" inhibitDragging = "false" url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewUsersUserReviews?userProfileId=427034608" >
< b >
Tay-lee-a
</ b >
</ GotoURL >
-
Version 4.2.3
-
01 August 2016
</ SetFontStyle >
*/
var urlAndAuthorElement = vboxView.ElementsSameNS("HBoxView").ToList()[1].ElementAnyNS("TextView").ElementAnyNS("SetFontStyle");
var verAndDateText = urlAndAuthorElement.Value;
var arrayVerAndDate=verAndDateText.Split('-');
var ver = arrayVerAndDate.SecondLast().Replace("Version","",StringComparison.InvariantCultureIgnoreCase).Trim();
var date = arrayVerAndDate.Last().Trim();
var creationDate = DateTime.Parse(date);
var goToUrlElement = urlAndAuthorElement.ElementAnyNS("GotoURL");
var customerName = HtmlAgilityPackHelper.StripHtmlTags(goToUrlElement.Value).Trim();
var contentText = vboxView.ElementAnyNS("TextView").ElementAnyNS("SetFontStyle").Value;
return new AppReview(titleText, contentText)
{
SourceUniqueId = sourceCustomerResponseId,
CreationDate = creationDate,
Rating = rating,
Version = ver,
CustomerName = customerName
};
}
catch (Exception exc)
{
var msg = "An error has occured when processing XElement ";
WebjobsLoggingHelper.LogError(msg, exc, vboxView.ToString());
return null;
}
}
public static async Task<XDocument> GetReviewsFromWebObjects(string countryCode, string appId, int pageNo)
{
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
var userAgent = "iTunes/9.2 (Macintosh; U; Mac OS X 10.6)";
// $country = "\nCOUNTRY: Australia";$store = 143460;
var front = GetStoreId(countryCode); //"%d-1" % appStoreId NOT -1
var headers = new Dictionary<string, string>()
{
{"X-Apple-Store-Front", front},
{"User-Agent", userAgent}
};
var url = String.Format(
"http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZStore.woa/wa/viewContentsUserReviews?id={0}&pageNumber={1}&sortOrdering=4&onlyLatestVersion=false&type=Purple+Software",
appId, pageNo);
var flurl=new FlurlClient(url);
foreach (var dictEntry in headers)
{
flurl.WithHeader(dictEntry.Key, dictEntry.Value);
}
var res = await flurl.GetXDocumentAsync();
return res;
}
private static XDocument GetXDocument(string url)
{
var result = url.GetXDocumentAsync();
var doc = result.Result;
Debug.WriteLine("From url " + url + " Response is " + doc);
return doc;
}
public static List<AppReview> XDocumentToAppReviews(XDocument doc)
{
XNamespace ns = doc.Root.Name.Namespace;
var entries = doc.Descendants(ns+"entry");
var reviews = entries.Select(CreateAppReview);
return reviews.Where(r=>r!=null).ToList();
}
private static AppReview CreateAppReview(XElement e)
{
var contentText = e.ElementsSameNS("content").FirstOrDefault(cont => cont.Attribute("type")?.Value=="text")?.Value;
if (contentText == null)
return null;
return new AppReview(e.ElementSameNS("title")?.Value, contentText)
{
SourceUniqueId = e.ElementSameNS("id")?.Value,
CreationDate = DateTime.Parse(e.ElementSameNS("updated")?.Value),
Rating = StructExtensions.TryParse(e.ElementAnyNS("rating")?.Value,0),
Version = e.ElementAnyNS("version")?.Value,
CustomerName = e.ElementSameNS("author")?.ElementSameNS("name")?.Value
};
}
private static string GetStoreId(string countryCode)
{
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
//TODO: ADD other countries if need
string storeId = "";
switch (countryCode)
{
case "":
storeId = "143441";//USA?
break;
case "AU":
storeId = "143460";
break;
case "NZ":
storeId = "143461";
break;
case "SG":
storeId = "143464";
break;
case "HK":
storeId = "143463";
break;
}
return storeId;
}
}
}
@RikScheffer
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment