Last active
December 30, 2019 08:10
-
-
Save AmyrAhmady/9ee8dd0e8acf3f2963dd9fdbcbcc88f7 to your computer and use it in GitHub Desktop.
aaaaa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"github.com/anaskhan96/soup" | |
"strings" | |
"regexp" | |
"os" | |
) | |
func main() { | |
b, err := ioutil.ReadFile("file.html") | |
if err != nil { | |
fmt.Print(err) | |
} | |
str := string(b) | |
r := regexp.MustCompile(`https:[^\s"><]*\bdetail\/[0-9]*`) | |
matches := r.FindAllString(str, -1) | |
file, err := os.Create("links.json") | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
file.WriteString("{\n") | |
file.WriteString("\t" + `"ads": [` + "\n") | |
for index, each := range matches { | |
if (index % 2) == 1 { | |
fmt.Println(index -1) | |
file.WriteString("\t\t{\n") | |
resp, err := soup.Get(each) | |
if err != nil { | |
continue | |
} | |
doc := soup.HTMLParse(resp) | |
if doc.Error != nil { | |
continue | |
} | |
itemsTemp := doc.Find("div", "class", "detail_part3") | |
if itemsTemp.Error != nil { | |
fmt.Println(itemsTemp.Error) | |
continue | |
} | |
items := doc.Find("div", "class", "detail_part3").FindAll("li") | |
if len(items) < 1 { | |
continue | |
} | |
file.WriteString("\t\t\t" + `"link": ` + `"` + each + `"` + ", \n") | |
file.WriteString("\t\t\t" + `"title": ` + `"` + doc.Find("div", "class", "title").Find("h1").Text() + `"` + ", \n") | |
for itemIndex, item := range items { | |
hasUsefulSpan := false | |
if item.Find("span", "class", "name").Text() == "گروه آگهی :: " { | |
catsAndSubCats := item.Find("span", "class", "val").FindAll("a") | |
file.WriteString("\t\t\t" + `"category": ` + `"` + catsAndSubCats[0].Text() + `"` + ", \n") | |
if len(catsAndSubCats) > 1 { | |
file.WriteString("\t\t\t" + `"subcategory": ` + `"` + catsAndSubCats[1].Text() + `"`) | |
} | |
hasUsefulSpan = true | |
} else if item.Find("span", "class", "name").Text() == "نام و نام خانوادگی :: " { | |
file.WriteString("\t\t\t" + `"name": ` + `"` + item.Find("span", "class", "val").Text() + `"`) | |
hasUsefulSpan = true | |
} else if item.Find("span", "class", "name").Text() == "تلفن تماس :: " { | |
file.WriteString("\t\t\t" + `"phone": ` + `"` + item.Find("span", "class", "val").Text() + `"`) | |
hasUsefulSpan = true | |
} else if item.Find("span", "class", "name").Text() == "لینک :: " { | |
file.WriteString("\t\t\t" + `"website": ` + `"` + item.Find("span", "class", "val").Find("a").Text() + `"`) | |
hasUsefulSpan = true | |
} else if item.Find("span", "class", "name").Text() == "موقعیت :: " { | |
file.WriteString("\t\t\t" + `"location": ` + `"` + item.Find("span", "class", "val").Text() + `"`) | |
hasUsefulSpan = true | |
} else if item.Find("span", "class", "name").Text() == "نشانی :: " { | |
file.WriteString("\t\t\t" + `"address": ` + `"` + strings.Replace(item.Find("span", "class", "val").Text(), "\n", " ", -1) + `"`) | |
hasUsefulSpan = true | |
} | |
if (len(items) - 1) == itemIndex { | |
file.WriteString("\n") | |
} else { | |
if hasUsefulSpan == true { | |
file.WriteString(",\n") | |
} | |
} | |
} | |
if (len(matches) - 1) == index { | |
file.WriteString("\t\t}\n") | |
} else { | |
file.WriteString("\t\t},\n") | |
} | |
} | |
} | |
file.WriteString("\t]\n") | |
file.WriteString("}") | |
err = file.Close() | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment