Created
July 16, 2012 19:07
-
-
Save alexsandro-xpt/3124417 to your computer and use it in GitHub Desktop.
Crawler data modeling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var WebSiteHistory = [{ | |
Id:1038984165156974140 /* Vai repetir muito(pra cada WebSite_Id x History_Id), é hash do Href. */ | |
,WebSite_Id: 542 /* Id do site crawleado */ | |
,History_Id:6 /* Id da data-tempo que foi crawleado */ | |
,Data: new Date() | |
,Page: [{ | |
Id:3125795879564125365 | |
,Href:"http://www.terra.com.br" /* Vai repetir muito(pra cada WebSite_Id x History_Id). */ | |
,ExternalLinks:5 | |
,FragmentLinks:3 | |
,InternalLinks: 35 | |
,NoHttpLinks:0 | |
,ContentLength:564 | |
,Status:200 | |
,Headers:[ /* Muitos destes headers repetirão. Vale apena colocar apenas a referencia? */ | |
{Nome:"content-type", Valor:"text/html"} | |
,{Nome:"date", Valor:"Mon, 16 Jul 2012 19:11:24 GMT"} | |
,{Nome:"server", Valor:"Apache/2.2.14 (Win32) DAV/2 mod_ssl/2.2.14 OpenSSL/0.9.8l mod_autoindex_color PHP/5.3.1"} | |
] | |
,Body:"dha976da9dh9sa7413.html" | |
,Links:[ | |
{Hash:1038984165156974140, Anchor:"Clique aqui", Deph:0, Rel:["nofollow","author"]} | |
,{Hash:684272168230605822, Anchor:"<b>Contato</b>", Deph:0} | |
] | |
}] | |
}]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment