Подскажите, как можно ускорить скорость парсинга - C#
Формулировка задачи:
Сам код под спойлером.
{ WebClient Client = new WebClient(); for (int i = 286543463; i <= 286543998; i++) { string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg"; string text = Client.DownloadString("http://vk.com/fuck_humor?z=photo-12382740_" + i + "%2Falbum-12382740_00%2Frev"); RegexOptions option = RegexOptions.IgnoreCase; Regex newReg = new Regex(pattern, option); MatchCollection matches; matches = newReg.Matches(text); foreach (Match mat in matches) { textBox1.Text += mat.ToString(); } } }
Решение задачи: «Подскажите, как можно ускорить скорость парсинга»
textual
Листинг программы
using System; using System.Collections.Generic; using System.Diagnostics; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace ConsoleApplication10 { class Program { static void Main(string[] args) { Method3(); Method2(); Method1(); Console.ReadLine(); } public static void Method3() { var timer = new Stopwatch(); timer.Start(); StringBuilder sb = new StringBuilder(); WebClient Client = new WebClient(); string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg"; string text = Client.DownloadString("http://vk.com/fuck_humor?z="); RegexOptions option = RegexOptions.IgnoreCase; Regex newReg = new Regex(pattern, option); Parallel.For(286543463, 286543998, i => { MatchCollection matches; matches = newReg.Matches(text); foreach (Match mat in matches) { sb.Append(mat.ToString() + Environment.NewLine); } }); timer.Stop(); Console.WriteLine("Method 3 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks); } public static void Method2() { var timer = new Stopwatch(); timer.Start(); var textbox = string.Empty; WebClient Client = new WebClient(); for (int i = 286543463; i <= 286543998; i++) { string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg"; string text = Client.DownloadString("http://vk.com/fuck_humor?z=photo-12382740_" + i + "%2Falbum-12382740_00%2Frev"); RegexOptions option = RegexOptions.IgnoreCase; Regex newReg = new Regex(pattern, option); MatchCollection matches; matches = newReg.Matches(text); foreach (Match mat in matches) { textbox += mat.ToString(); } } timer.Stop(); Console.WriteLine("Method 2 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks); } public static void Method1() { var timer = new Stopwatch(); timer.Start(); var textbox = string.Empty; var listUrl = new List<string>(); var listUrlResult = new Dictionary<string, string>(); for (int i = 286543463; i <= 286543998; i++) { listUrl.Add(string.Format("http://vk.com/fuck_humor?z=photo-12382740_{0}%2Falbum-12382740_00%2Frev", i)); } int cheker = 0; var taskList = new Task[25]; for (int index = 0; index < listUrl.Count; index++) { var curUrl = listUrl[index]; var a = new Action(() => { WebClient Client = new WebClient(); try { string text = Client.DownloadString(curUrl); listUrlResult.Add(curUrl, text); } catch (Exception) { } finally { if (Client != null)Client.Dispose(); } }); var t = new Task(a); t.Start(); taskList.SetValue(t, cheker); cheker++; if (cheker == 25) { Task.WaitAll(taskList); cheker = 0; } } string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg"; RegexOptions option = RegexOptions.IgnoreCase; Regex newReg = new Regex(pattern, option); MatchCollection matches; foreach (var value in listUrlResult) { matches = newReg.Matches(value.Value); foreach (Match mat in matches) { textbox += mat.ToString(); } } timer.Stop(); Console.WriteLine("Method 1 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks); } } }
ИИ поможет Вам:
- решить любую задачу по программированию
- объяснить код
- расставить комментарии в коде
- и т.д