.NET 4.x Ошибка в парсере html на второй итерации - C#
Формулировка задачи:
Добрый день!
Пытаюсь сделать парсер. Для начала делаю два POST запроса, далее GET запрос. После второго POST запроса я вібирают href с нужного div-a. Но при формировании из href нового адресса для GET запроса вілазит ошибка. Помогите пожалуйста.
Листинг программы
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Diagnostics;
- using System.Drawing;
- using System.IO;
- using System.Linq;
- using System.Net;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Threading.Tasks;
- using System.Web;
- using System.Web.UI;
- using System.Windows.Forms;
- using HtmlAgilityPack;
- namespace WindowsFormsApplicationParce
- {
- public partial class Form1 : Form
- {
- UriBuilder uriBuilder = new UriBuilder();
- public Form1()
- {
- InitializeComponent();
- }
- private void button1_Click(object sender, EventArgs e)
- {
- // Create a request using a URL that can receive a post.
- WebRequest request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/results_int");
- // Set the Method property of the request to POST.
- request.Method = "POST";
- // Create POST data and convert it to a byte array.
- string postData = "action=getSchoolsByRegion®ion=10";
- byte[] byteArray = Encoding.UTF8.GetBytes(postData);
- // Set the ContentType property of the WebRequest.
- request.ContentType = "application/x-www-form-urlencoded";
- // Set the ContentLength property of the WebRequest.
- request.ContentLength = byteArray.Length;
- // Get the request stream.
- Stream dataStream = request.GetRequestStream();
- // Write the data to the request stream.
- dataStream.Write(byteArray, 0, byteArray.Length);
- // Close the Stream object.
- dataStream.Close();
- // Get the response.
- WebResponse response = request.GetResponse();
- // Display the status.
- Console.WriteLine(((HttpWebResponse) response).StatusDescription);
- // Get the stream containing content returned by the server.
- dataStream = response.GetResponseStream();
- // Open the stream using a StreamReader for easy access.
- StreamReader reader = new StreamReader(dataStream);
- // Read the content.
- string responseFromServer = reader.ReadToEnd();
- // Display the content.
- // Create a request using a URL that can receive a post.
- request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/resstudents");
- // Set the Method property of the request to POST.
- request.Method = "POST";
- // Create POST data and convert it to a byte array.
- postData = "schoolId=10245";
- byteArray = Encoding.UTF8.GetBytes(postData);
- // Set the ContentType property of the WebRequest.
- request.ContentType = "application/x-www-form-urlencoded";
- // Set the ContentLength property of the WebRequest.
- request.ContentLength = byteArray.Length;
- // Get the request stream.
- dataStream = request.GetRequestStream();
- // Write the data to the request stream.
- dataStream.Write(byteArray, 0, byteArray.Length);
- // Close the Stream object.
- dataStream.Close();
- // Get the response.
- response = request.GetResponse();
- // Display the status.
- Console.WriteLine(((HttpWebResponse) response).StatusDescription);
- // Get the stream containing content returned by the server.
- dataStream = response.GetResponseStream();
- // Open the stream using a StreamReader for easy access.
- reader = new StreamReader(dataStream);
- // Read the content.
- responseFromServer = reader.ReadToEnd();
- // Display the content.
- textBox1.Text = responseFromServer.ToString();
- HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
- string htmlString = responseFromServer;
- document.LoadHtml(responseFromServer);
- var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]");
- foreach (var obj in list)
- {
- var url = obj.SelectSingleNode(".").Attributes["href"].Value;
- //url = HttpUtility.HtmlDecode(url);
- //url = HttpUtility.UrlEncode(url);
- textBox1.Text += url + Environment.NewLine;
- Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);
- string r = HttpUtility.ParseQueryString(urii.Query).Get("r");
- string student = HttpUtility.ParseQueryString(urii.Query).Get("student");
- string schoolId = HttpUtility.ParseQueryString(urii.Query).Get("schoolId");
- string student_name = HttpUtility.ParseQueryString(urii.Query).Get("student_name");
- uriBuilder.Scheme = "http";
- uriBuilder.Host = "www.kangaroo.com.ua";
- uriBuilder.Path = "index.php";
- var query = HttpUtility.ParseQueryString(uriBuilder.Query);
- query["r"] = r;
- query["student"] = student;
- query["schoolId"] = schoolId;
- query["student_name"] = student_name;
- uriBuilder.Query = query.ToString();
- request =
- (HttpWebRequest)
- WebRequest.Create(uriBuilder.Uri)as HttpWebRequest;
- request.Method = "GET";
- response = (HttpWebResponse)request.GetResponse();
- dataStream = response.GetResponseStream();
- reader = new StreamReader(dataStream);
- // Read the content.
- responseFromServer = reader.ReadToEnd();
- htmlString = responseFromServer;
- document.LoadHtml(htmlString);
- HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//div[@id='content']");
- if (String.IsNullOrEmpty(bodyNode.ToString()))
- {
- bodyNode = bodyNode.SelectSingleNode("//input[@id='surname']");
- textBox1.Text += bodyNode.Attributes["value"].Value;
- bodyNode = bodyNode.SelectSingleNode("//input[@id='name']");
- textBox1.Text += bodyNode.Attributes["value"].Value;
- bodyNode = bodyNode.SelectSingleNode("//input[@id='patronymic']");
- textBox1.Text += bodyNode.Attributes["value"].Value;
- //количество балов
- bodyNode = document.DocumentNode.SelectSingleNode("//div[@class='legend_total']");
- textBox1.Text += bodyNode.SelectSingleNode("//b").InnerText;
- }
- }
- }
- }
Решение задачи: «.NET 4.x Ошибка в парсере html на второй итерации»
textual
Листинг программы
- var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]").Select(node => node.Attributes["href"].Value).ToArray();
- foreach (string url in list)
- {
- textBox1Text += url + Environment.NewLine;
- Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);
ИИ поможет Вам:
- решить любую задачу по программированию
- объяснить код
- расставить комментарии в коде
- и т.д