.NET 4.x Ошибка в парсере html на второй итерации - C#

Узнай цену своей работы

Формулировка задачи:

Добрый день! Пытаюсь сделать парсер. Для начала делаю два POST запроса, далее GET запрос. После второго POST запроса я вібирают href с нужного div-a. Но при формировании из href нового адресса для GET запроса вілазит ошибка. Помогите пожалуйста.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
using System.Web.UI;
using System.Windows.Forms;
using HtmlAgilityPack;

namespace WindowsFormsApplicationParce
{
    public partial class Form1 : Form
    {
        UriBuilder uriBuilder = new UriBuilder();
        public Form1()
        {
            InitializeComponent();
        }
 
        private void button1_Click(object sender, EventArgs e)
        {
            // Create a request using a URL that can receive a post. 
            WebRequest request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/results_int");
            // Set the Method property of the request to POST.
            request.Method = "POST";
            // Create POST data and convert it to a byte array.
            string postData = "action=getSchoolsByRegion&region=10";
            byte[] byteArray = Encoding.UTF8.GetBytes(postData);
            // Set the ContentType property of the WebRequest.
            request.ContentType = "application/x-www-form-urlencoded";
            // Set the ContentLength property of the WebRequest.
            request.ContentLength = byteArray.Length;
            // Get the request stream.
            Stream dataStream = request.GetRequestStream();
            // Write the data to the request stream.
            dataStream.Write(byteArray, 0, byteArray.Length);
            // Close the Stream object.
            dataStream.Close();
            // Get the response.
            WebResponse response = request.GetResponse();
            // Display the status.
            Console.WriteLine(((HttpWebResponse) response).StatusDescription);
            // Get the stream containing content returned by the server.
            dataStream = response.GetResponseStream();
            // Open the stream using a StreamReader for easy access.
            StreamReader reader = new StreamReader(dataStream);
            // Read the content.
            string responseFromServer = reader.ReadToEnd();
            // Display the content.

            // Create a request using a URL that can receive a post. 
            request = WebRequest.Create("http://www.kangaroo.com.ua/index.php?r=conreq/resstudents");
            // Set the Method property of the request to POST.
            request.Method = "POST";
            // Create POST data and convert it to a byte array.
            postData = "schoolId=10245";
            byteArray = Encoding.UTF8.GetBytes(postData);
            // Set the ContentType property of the WebRequest.
            request.ContentType = "application/x-www-form-urlencoded";
            // Set the ContentLength property of the WebRequest.
            request.ContentLength = byteArray.Length;
            // Get the request stream.
            dataStream = request.GetRequestStream();
            // Write the data to the request stream.
            dataStream.Write(byteArray, 0, byteArray.Length);
            // Close the Stream object.
            dataStream.Close();
            // Get the response.
            response = request.GetResponse();
            // Display the status.
            Console.WriteLine(((HttpWebResponse) response).StatusDescription);
            // Get the stream containing content returned by the server.
            dataStream = response.GetResponseStream();
            // Open the stream using a StreamReader for easy access.
            reader = new StreamReader(dataStream);
            // Read the content.
            responseFromServer = reader.ReadToEnd();
            // Display the content.
            textBox1.Text = responseFromServer.ToString();
 
            HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
            string htmlString = responseFromServer;
 
            document.LoadHtml(responseFromServer);
            var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]");
            foreach (var obj in list)
            {
                var url = obj.SelectSingleNode(".").Attributes["href"].Value;
                //url = HttpUtility.HtmlDecode(url);
                //url = HttpUtility.UrlEncode(url);
                textBox1.Text += url + Environment.NewLine;
                Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);
                string r = HttpUtility.ParseQueryString(urii.Query).Get("r");
                string student = HttpUtility.ParseQueryString(urii.Query).Get("student");
                string schoolId = HttpUtility.ParseQueryString(urii.Query).Get("schoolId");
                string student_name = HttpUtility.ParseQueryString(urii.Query).Get("student_name");
                
                uriBuilder.Scheme = "http";
                uriBuilder.Host = "www.kangaroo.com.ua";
                uriBuilder.Path = "index.php";
                var query = HttpUtility.ParseQueryString(uriBuilder.Query);
                query["r"] = r;
                query["student"] = student;
                query["schoolId"] = schoolId;
                query["student_name"] = student_name;
                uriBuilder.Query = query.ToString();
                request =
               (HttpWebRequest)
                   WebRequest.Create(uriBuilder.Uri)as HttpWebRequest;
                request.Method = "GET";
 
                response = (HttpWebResponse)request.GetResponse();
                dataStream = response.GetResponseStream();
                reader = new StreamReader(dataStream);
                // Read the content.
                responseFromServer = reader.ReadToEnd();
                htmlString = responseFromServer;
                document.LoadHtml(htmlString);
                HtmlNode bodyNode = document.DocumentNode.SelectSingleNode("//div[@id='content']");
                if (String.IsNullOrEmpty(bodyNode.ToString()))
                {
                    bodyNode = bodyNode.SelectSingleNode("//input[@id='surname']");
                    textBox1.Text += bodyNode.Attributes["value"].Value;
                    bodyNode = bodyNode.SelectSingleNode("//input[@id='name']");
                    textBox1.Text += bodyNode.Attributes["value"].Value;
                    bodyNode = bodyNode.SelectSingleNode("//input[@id='patronymic']");
                    textBox1.Text += bodyNode.Attributes["value"].Value;
                    //количество балов
                    bodyNode = document.DocumentNode.SelectSingleNode("//div[@class='legend_total']");
                    textBox1.Text += bodyNode.SelectSingleNode("//b").InnerText;
                }
            }
     }
}

Решение задачи: «.NET 4.x Ошибка в парсере html на второй итерации»

textual
Листинг программы
var list = document.DocumentNode.SelectNodes("//div[@id='content']/a[@href]").Select(node => node.Attributes["href"].Value).ToArray();
foreach (string url in list)
{
    textBox1Text += url + Environment.NewLine;
    Uri urii = new Uri(@"http:\\kangaroo.com.ua"+ url);

ИИ поможет Вам:


  • решить любую задачу по программированию
  • объяснить код
  • расставить комментарии в коде
  • и т.д
Попробуйте бесплатно

Оцени полезность:

10   голосов , оценка 4 из 5