Thursday, October 31, 2019

Web Scrapping

using HtmlAgilityPack;
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication9
{
class Program
{
static void Main(string[] args)
{
Console.ReadKey();
}
static async Task<int> ProcessURLAsync(string url, HtmlWeb web)
{
HtmlDocument doc = web.Load("https://www.phrasemix.com/examples?page=1");
foreach (HtmlNode row in doc.DocumentNode.SelectNodes("//h3[@class='secondary-example-link quoted-title mt1']"))
{
await WriteTextAsync(HtmlEntity.DeEntitize(row.InnerText));
await WriteTextAsync(HtmlEntity.DeEntitize(row.SelectSingleNode("//p").InnerText));
}
return 1;
}
static async Task WriteTextAsync(string text)
{
byte[] encodedText = Encoding.Unicode.GetBytes(text);
using (FileStream sourceStream = new FileStream(@"p:/phrasemix.txt",
FileMode.Append, FileAccess.Write, FileShare.None,
bufferSize: 4096, useAsync: true))
{
await sourceStream.WriteAsync(encodedText, 0, encodedText.Length);
};
}
}
}
view raw Webscrapping.cs hosted with ❤ by GitHub

No comments :