.NET 4.x Ошибка в парсере html на второй итерации - C#
Формулировка задачи:
Добрый день!
Пытаюсь сделать парсер. Для начала делаю два POST запроса, далее GET запрос. После второго POST запроса я вібирают href с нужного div-a. Но при формировании из href нового адресса для GET запроса вілазит ошибка. Помогите пожалуйста.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
using System.Web.UI;
using System.Windows.Forms;
using HtmlAgilityPack;
namespace WindowsFormsApplicationParce
{
public partial class Form1 : Form
{
UriBuilder uriBuilder = new UriBuilder();
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
// Create a request using a URL that can receive a post.
WebRequest request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/results_int");
// Set the Method property of the request to POST.
request.Method = "POST";
// Create POST data and convert it to a byte array.
string postData = "action=getSchoolsByRegion®ion=10";
byte[] byteArray = Encoding.UTF8.GetBytes(postData);
// Set the ContentType property of the WebRequest.
request.ContentType = "application/x-www-form-urlencoded";
// Set the ContentLength property of the WebRequest.
request.ContentLength = byteArray.Length;
// Get the request stream.
Stream dataStream = request.GetRequestStream();
// Write the data to the request stream.
dataStream.Write(byteArray, 0, byteArray.Length);
// Close the Stream object.
dataStream.Close();
// Get the response.
WebResponse response = request.GetResponse();
// Display the status.
Console.WriteLine(((HttpWebResponse) response).StatusDescription);
// Get the stream containing content returned by the server.
dataStream = response.GetResponseStream();
// Open the stream using a StreamReader for easy access.
StreamReader reader = new StreamReader(dataStream);
// Read the content.
string responseFromServer = reader.ReadToEnd();
// Display the content.
// Create a request using a URL that can receive a post.
request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/resstudents");
// Set the Method property of the request to POST.
request.Method = "POST";
// Create POST data and convert it to a byte array.
postData = "schoolId=10245";
byteArray = Encoding.UTF8.GetBytes(postData);
// Set the ContentType property of the WebRequest.
request.ContentType = "application/x-www-form-urlencoded";
// Set the ContentLength property of the WebRequest.
request.ContentLength = byteArray.Length;
// Get the request stream.
dataStream = request.GetRequestStream();
// Write the data to the request stream.
dataStream.Write(byteArray, 0, byteArray.Length);
// Close the Stream object.
dataStream.Close();
// Get the response.
response = request.GetResponse();
// Display the status.
Console.WriteLine(((HttpWebResponse) response).StatusDescription);
// Get the stream containing content returned by the server.
dataStream = response.GetResponseStream();
// Open the stream using a StreamReader for easy access.
reader = new StreamReader(dataStream);
// Read the content.
responseFromServer = reader.ReadToEnd();
// Display the content.
textBox1.Text = responseFromServer.ToString();
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
string htmlString = responseFromServer;
document.LoadHtml(responseFromServer);
var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]");
foreach (var obj in list)
{
var url = obj.SelectSingleNode(".").Attributes["href"].Value;
//url = HttpUtility.HtmlDecode(url);
//url = HttpUtility.UrlEncode(url);
textBox1.Text += url + Environment.NewLine;
Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);
string r = HttpUtility.ParseQueryString(urii.Query).Get("r");
string student = HttpUtility.ParseQueryString(urii.Query).Get("student");
string schoolId = HttpUtility.ParseQueryString(urii.Query).Get("schoolId");
string student_name = HttpUtility.ParseQueryString(urii.Query).Get("student_name");
uriBuilder.Scheme = "http";
uriBuilder.Host = "www.kangaroo.com.ua";
uriBuilder.Path = "index.php";
var query = HttpUtility.ParseQueryString(uriBuilder.Query);
query["r"] = r;
query["student"] = student;
query["schoolId"] = schoolId;
query["student_name"] = student_name;
uriBuilder.Query = query.ToString();
request =
(HttpWebRequest)
WebRequest.Create(uriBuilder.Uri)as HttpWebRequest;
request.Method = "GET";
response = (HttpWebResponse)request.GetResponse();
dataStream = response.GetResponseStream();
reader = new StreamReader(dataStream);
// Read the content.
responseFromServer = reader.ReadToEnd();
htmlString = responseFromServer;
document.LoadHtml(htmlString);
HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//div[@id='content']");
if (String.IsNullOrEmpty(bodyNode.ToString()))
{
bodyNode = bodyNode.SelectSingleNode("//input[@id='surname']");
textBox1.Text += bodyNode.Attributes["value"].Value;
bodyNode = bodyNode.SelectSingleNode("//input[@id='name']");
textBox1.Text += bodyNode.Attributes["value"].Value;
bodyNode = bodyNode.SelectSingleNode("//input[@id='patronymic']");
textBox1.Text += bodyNode.Attributes["value"].Value;
//количество балов
bodyNode = document.DocumentNode.SelectSingleNode("//div[@class='legend_total']");
textBox1.Text += bodyNode.SelectSingleNode("//b").InnerText;
}
}
}
}Решение задачи: «.NET 4.x Ошибка в парсере html на второй итерации»
textual
Листинг программы
var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]").Select(node => node.Attributes["href"].Value).ToArray();
foreach (string url in list)
{
textBox1Text += url + Environment.NewLine;
Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);