Подскажите, как можно ускорить скорость парсинга - C#
Формулировка задачи:
Сам код под спойлером.
{
WebClient Client = new WebClient();
for (int i = 286543463; i <= 286543998; i++)
{
string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg";
string text = Client.DownloadString("http://vk.com/fuck_humor?z=photo-12382740_" + i + "%2Falbum-12382740_00%2Frev");
RegexOptions option = RegexOptions.IgnoreCase;
Regex newReg = new Regex(pattern, option);
MatchCollection matches;
matches = newReg.Matches(text);
foreach (Match mat in matches)
{
textBox1.Text += mat.ToString();
}
}
}Решение задачи: «Подскажите, как можно ускорить скорость парсинга»
textual
Листинг программы
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApplication10
{
class Program
{
static void Main(string[] args)
{
Method3(); Method2(); Method1();
Console.ReadLine();
}
public static void Method3()
{
var timer = new Stopwatch();
timer.Start();
StringBuilder sb = new StringBuilder();
WebClient Client = new WebClient();
string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg";
string text = Client.DownloadString("http://vk.com/fuck_humor?z=");
RegexOptions option = RegexOptions.IgnoreCase;
Regex newReg = new Regex(pattern, option);
Parallel.For(286543463, 286543998, i =>
{
MatchCollection matches;
matches = newReg.Matches(text);
foreach (Match mat in matches)
{
sb.Append(mat.ToString() + Environment.NewLine);
}
});
timer.Stop();
Console.WriteLine("Method 3 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks);
}
public static void Method2()
{
var timer = new Stopwatch();
timer.Start();
var textbox = string.Empty;
WebClient Client = new WebClient();
for (int i = 286543463; i <= 286543998; i++)
{
string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg";
string text = Client.DownloadString("http://vk.com/fuck_humor?z=photo-12382740_" + i + "%2Falbum-12382740_00%2Frev");
RegexOptions option = RegexOptions.IgnoreCase;
Regex newReg = new Regex(pattern, option);
MatchCollection matches;
matches = newReg.Matches(text);
foreach (Match mat in matches)
{
textbox += mat.ToString();
}
}
timer.Stop();
Console.WriteLine("Method 2 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks);
}
public static void Method1()
{
var timer = new Stopwatch();
timer.Start();
var textbox = string.Empty;
var listUrl = new List<string>();
var listUrlResult = new Dictionary<string, string>();
for (int i = 286543463; i <= 286543998; i++)
{
listUrl.Add(string.Format("http://vk.com/fuck_humor?z=photo-12382740_{0}%2Falbum-12382740_00%2Frev", i));
}
int cheker = 0;
var taskList = new Task[25];
for (int index = 0; index < listUrl.Count; index++)
{
var curUrl = listUrl[index];
var a = new Action(() =>
{
WebClient Client = new WebClient();
try
{
string text = Client.DownloadString(curUrl);
listUrlResult.Add(curUrl, text);
}
catch (Exception)
{ }
finally { if (Client != null)Client.Dispose(); }
});
var t = new Task(a);
t.Start();
taskList.SetValue(t, cheker);
cheker++;
if (cheker == 25)
{
Task.WaitAll(taskList);
cheker = 0;
}
}
string pattern = @"http://cs[a-z,0-9]{2,}\.vk.me/[a-z,0-9,/]{2,}.jpg";
RegexOptions option = RegexOptions.IgnoreCase;
Regex newReg = new Regex(pattern, option);
MatchCollection matches;
foreach (var value in listUrlResult)
{
matches = newReg.Matches(value.Value);
foreach (Match mat in matches)
{
textbox += mat.ToString();
}
}
timer.Stop();
Console.WriteLine("Method 1 : " + timer.ElapsedMilliseconds + " " + timer.ElapsedTicks);
}
}
}