.NET 4.x Ошибка в парсере html на второй итерации - C#
Формулировка задачи:
Добрый день!
Пытаюсь сделать парсер. Для начала делаю два POST запроса, далее GET запрос. После второго POST запроса я вібирают href с нужного div-a. Но при формировании из href нового адресса для GET запроса вілазит ошибка. Помогите пожалуйста.
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Web; using System.Web.UI; using System.Windows.Forms; using HtmlAgilityPack; namespace WindowsFormsApplicationParce { public partial class Form1 : Form { UriBuilder uriBuilder = new UriBuilder(); public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { // Create a request using a URL that can receive a post. WebRequest request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/results_int"); // Set the Method property of the request to POST. request.Method = "POST"; // Create POST data and convert it to a byte array. string postData = "action=getSchoolsByRegion®ion=10"; byte[] byteArray = Encoding.UTF8.GetBytes(postData); // Set the ContentType property of the WebRequest. request.ContentType = "application/x-www-form-urlencoded"; // Set the ContentLength property of the WebRequest. request.ContentLength = byteArray.Length; // Get the request stream. Stream dataStream = request.GetRequestStream(); // Write the data to the request stream. dataStream.Write(byteArray, 0, byteArray.Length); // Close the Stream object. dataStream.Close(); // Get the response. WebResponse response = request.GetResponse(); // Display the status. Console.WriteLine(((HttpWebResponse) response).StatusDescription); // Get the stream containing content returned by the server. dataStream = response.GetResponseStream(); // Open the stream using a StreamReader for easy access. StreamReader reader = new StreamReader(dataStream); // Read the content. string responseFromServer = reader.ReadToEnd(); // Display the content. // Create a request using a URL that can receive a post. request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/resstudents"); // Set the Method property of the request to POST. request.Method = "POST"; // Create POST data and convert it to a byte array. postData = "schoolId=10245"; byteArray = Encoding.UTF8.GetBytes(postData); // Set the ContentType property of the WebRequest. request.ContentType = "application/x-www-form-urlencoded"; // Set the ContentLength property of the WebRequest. request.ContentLength = byteArray.Length; // Get the request stream. dataStream = request.GetRequestStream(); // Write the data to the request stream. dataStream.Write(byteArray, 0, byteArray.Length); // Close the Stream object. dataStream.Close(); // Get the response. response = request.GetResponse(); // Display the status. Console.WriteLine(((HttpWebResponse) response).StatusDescription); // Get the stream containing content returned by the server. dataStream = response.GetResponseStream(); // Open the stream using a StreamReader for easy access. reader = new StreamReader(dataStream); // Read the content. responseFromServer = reader.ReadToEnd(); // Display the content. textBox1.Text = responseFromServer.ToString(); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); string htmlString = responseFromServer; document.LoadHtml(responseFromServer); var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]"); foreach (var obj in list) { var url = obj.SelectSingleNode(".").Attributes["href"].Value; //url = HttpUtility.HtmlDecode(url); //url = HttpUtility.UrlEncode(url); textBox1.Text += url + Environment.NewLine; Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url); string r = HttpUtility.ParseQueryString(urii.Query).Get("r"); string student = HttpUtility.ParseQueryString(urii.Query).Get("student"); string schoolId = HttpUtility.ParseQueryString(urii.Query).Get("schoolId"); string student_name = HttpUtility.ParseQueryString(urii.Query).Get("student_name"); uriBuilder.Scheme = "http"; uriBuilder.Host = "www.kangaroo.com.ua"; uriBuilder.Path = "index.php"; var query = HttpUtility.ParseQueryString(uriBuilder.Query); query["r"] = r; query["student"] = student; query["schoolId"] = schoolId; query["student_name"] = student_name; uriBuilder.Query = query.ToString(); request = (HttpWebRequest) WebRequest.Create(uriBuilder.Uri)as HttpWebRequest; request.Method = "GET"; response = (HttpWebResponse)request.GetResponse(); dataStream = response.GetResponseStream(); reader = new StreamReader(dataStream); // Read the content. responseFromServer = reader.ReadToEnd(); htmlString = responseFromServer; document.LoadHtml(htmlString); HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//div[@id='content']"); if (String.IsNullOrEmpty(bodyNode.ToString())) { bodyNode = bodyNode.SelectSingleNode("//input[@id='surname']"); textBox1.Text += bodyNode.Attributes["value"].Value; bodyNode = bodyNode.SelectSingleNode("//input[@id='name']"); textBox1.Text += bodyNode.Attributes["value"].Value; bodyNode = bodyNode.SelectSingleNode("//input[@id='patronymic']"); textBox1.Text += bodyNode.Attributes["value"].Value; //количество балов bodyNode = document.DocumentNode.SelectSingleNode("//div[@class='legend_total']"); textBox1.Text += bodyNode.SelectSingleNode("//b").InnerText; } } } }
Решение задачи: «.NET 4.x Ошибка в парсере html на второй итерации»
textual
Листинг программы
var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]").Select(node => node.Attributes["href"].Value).ToArray(); foreach (string url in list) { textBox1Text += url + Environment.NewLine; Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);
ИИ поможет Вам:
- решить любую задачу по программированию
- объяснить код
- расставить комментарии в коде
- и т.д