yurukov · November 19, 2021 10:12 · postullat · Feb 4, 2020 · oboote · Nov 19, 2021
diff --git a/README b/README
 These are a few commands that could be used to scrape a full group page
 from Facebook. One can use the Graph API, but there some users would be
 hidden. The JS commands should be run in a browser and scroll through
 the page opening up hidden content and comments. I used Chrome. Once
 enough content is opened, you should save the page as any other and
 analyse it's contents. 
diff --git a/commands.js b/commands.js
 // 1. load the group

 // 2. start scrolling. This will erase all images to minimize the size
 // of the page in memory and keep scrolling down

 scroll = setInterval(function() {
  a = $$("img"); for (i=0;i<a.length;i++) a[i].parentNode.removeChild(a[i]);
  window.scrollTo(0,document.body.scrollHeight);
 },3000);

 // 3. Stop scrolling when satisfied
 clearInterval(scroll);

 // 4. Add a guard against reloading the page
 window.onbeforeunload = function() {
  clearInterval(uncover);
  return "Loading hidden comments stopped.";
 }

 // 5. Load hidden comments and posts. Loading some posts may reload the
 // page. In these cases the guard above will stop the loading process and
 // stop the reload. In that case, press cancel and run this command again
 uncover = setInterval(function() {
  a = $$("img"); for (i=0;i<a.length;i++) a[i].parentNode.removeChild(a[i]);
  a = $$("a[class='see_more_link']");
  if (a.length>0) {
    a[0].target="_blank";
    a[0].click();
    a[0].className="see_more_link passed";
  }
  b = $$("a[class='UFIPagerLink']");
  if (b.length>0) {
    b[0].click();
    b[0].className="UFIPagerLink passed";
  }
  console.log(a.length+" "+b.length);
 },1000);

 // 6. When all is loaded, stop the comment/post recover process
 clearInterval(uncover);

 // 7. Save the page code from the browser
	These are a few commands that could be used to scrape a full group page
	from Facebook. One can use the Graph API, but there some users would be
	hidden. The JS commands should be run in a browser and scroll through
	the page opening up hidden content and comments. I used Chrome. Once
	enough content is opened, you should save the page as any other and
	analyse it's contents.
	// 1. load the group

	// 2. start scrolling. This will erase all images to minimize the size
	// of the page in memory and keep scrolling down

	scroll = setInterval(function() {
	a = $$("img"); for (i=0;i<a.length;i++) a[i].parentNode.removeChild(a[i]);
	window.scrollTo(0,document.body.scrollHeight);
	},3000);

	// 3. Stop scrolling when satisfied
	clearInterval(scroll);

	// 4. Add a guard against reloading the page
	window.onbeforeunload = function() {
	clearInterval(uncover);
	return "Loading hidden comments stopped.";
	}

	// 5. Load hidden comments and posts. Loading some posts may reload the
	// page. In these cases the guard above will stop the loading process and
	// stop the reload. In that case, press cancel and run this command again
	uncover = setInterval(function() {
	a = $$("img"); for (i=0;i<a.length;i++) a[i].parentNode.removeChild(a[i]);
	a = $$("a[class='see_more_link']");
	if (a.length>0) {
	a[0].target="_blank";
	a[0].click();
	a[0].className="see_more_link passed";
	}
	b = $$("a[class='UFIPagerLink']");
	if (b.length>0) {
	b[0].click();
	b[0].className="UFIPagerLink passed";
	}
	console.log(a.length+" "+b.length);
	},1000);

	// 6. When all is loaded, stop the comment/post recover process
	clearInterval(uncover);

	// 7. Save the page code from the browser