-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProgram.cs
More file actions
66 lines (55 loc) · 2.37 KB
/
Program.cs
File metadata and controls
66 lines (55 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
using System;
using System.Linq;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using ScrapySharp.Extensions;
using ScrapySharp.Network;
using Newtonsoft.Json;
namespace webscraping_cli
{
class Program
{
static void Main(string[] args)
{
ScrapingBrowser browser = new ScrapingBrowser();
int i = 0;
bool finish = false;
gsm gsmData = new gsm();
while (!finish)
{
i++;
WebPage mainPage = browser.NavigateToPage(new Uri($"https://www.digikala.com/search/category-mobile-phone/?pageno={i}&sortby=4"));
HtmlNode[] phonesData = mainPage.Html
.CssSelect("#content > div > div.o-page > div > div.o-page__content > div > article > div > ul > li ")
.ToArray();
foreach (var phoneData in phonesData)
{
string data = phoneData.CssSelect("div").ToArray()[0].GetAttributeValue("data-enhanced-ecommerce");
Dictionary<string, string> dict = JsonConvert.DeserializeObject<Dictionary<string, string>>(data);
Regex re = new Regex(@"[a-zA-Z0-9- ]{4,}");
string name = re.Match(dict["name"]).Value.Trim();
int len = name.Length;
WebPage gsmPage = browser.NavigateToPage(new Uri($"https://www.gsmarena.com/res.php3?sSearch={name.Substring(0, len - 2)}"));
string url = "error";
try
{
url = gsmPage.Html.CssSelect("#review-body > div > ul > li > a")
.ToArray()[0].GetAttributeValue("href");
if (dict["price"] == null)
{
finish = true;
break;
}
gsmData.dataExtractor(url, name, int.Parse(dict["id"]), int.Parse(dict["price"]));
}
catch (Exception e)
{
gsmData.corruptedData(name, int.Parse(dict["id"]));
Console.WriteLine(e.Message);
}
}
}
}
}
}