From 7997b7c589b084d043864489224b02d506cf0ba0 Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Mon, 6 Jan 2025 23:04:49 +0800 Subject: [PATCH 1/8] complete getting notice info --- constants/constants.go | 26 +++++++------- jwch_test.go | 7 ++++ model.go | 6 ++++ notice.go | 82 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 12 deletions(-) create mode 100644 notice.go diff --git a/constants/constants.go b/constants/constants.go index 45f1c57..0359407 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -17,18 +17,20 @@ limitations under the License. package constants const ( - ClassroomQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/kkgl/kbcx/kbcx_kjs.aspx" - CourseURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xkjg/wdxk/xkjg_list.aspx" - MarksQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/cjyl/score_sheet.aspx" - CETQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/glbm/cet/cet_cszt.aspx" - JSQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/glbm/computer/jsj_cszt.aspx" - UserInfoURL = "https://jwcjwxt2.fzu.edu.cn:81/jcxx/xsxx/StudentInformation.aspx" - SSOLoginURL = "https://jwcjwxt2.fzu.edu.cn/Sfrz/SSOLogin" - SchoolCalendarURL = "https://jwcjwxt2.fzu.edu.cn:82/xl.asp" - CreditQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/xftj/CreditStatistics.aspx" - GPAQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/jdpm/GPA_sheet.aspx" - VerifyCodeURL = "https://jwcjwxt1.fzu.edu.cn/plus/verifycode.asp" - ExamRoomQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xkjg/examination/exam_list.aspx" + ClassroomQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/kkgl/kbcx/kbcx_kjs.aspx" + CourseURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xkjg/wdxk/xkjg_list.aspx" + MarksQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/cjyl/score_sheet.aspx" + CETQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/glbm/cet/cet_cszt.aspx" + JSQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/glbm/computer/jsj_cszt.aspx" + UserInfoURL = "https://jwcjwxt2.fzu.edu.cn:81/jcxx/xsxx/StudentInformation.aspx" + SSOLoginURL = "https://jwcjwxt2.fzu.edu.cn/Sfrz/SSOLogin" + SchoolCalendarURL = "https://jwcjwxt2.fzu.edu.cn:82/xl.asp" + CreditQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/xftj/CreditStatistics.aspx" + GPAQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xyzk/jdpm/GPA_sheet.aspx" + VerifyCodeURL = "https://jwcjwxt1.fzu.edu.cn/plus/verifycode.asp" + ExamRoomQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xkjg/examination/exam_list.aspx" + NoticeInfoQueryURL = "https://jwch.fzu.edu.cn/jxtz.htm" + JwchNoticeURLPrefix = "https://jwch.fzu.edu.cn/" JwchPrefix = "https://jwcjwxt2.fzu.edu.cn:81" JwchReferer = "https://jwcjwxt1.fzu.edu.cn/" diff --git a/jwch_test.go b/jwch_test.go index c73872a..11fe259 100644 --- a/jwch_test.go +++ b/jwch_test.go @@ -277,3 +277,10 @@ func TestGetExamRoomInfo(t *testing.T) { t.Error(err) } } + +func TestGetNoticesInfo(t *testing.T) { + _, err := stu.GetNoticeInfo() + if err != nil { + t.Error(err) + } +} diff --git a/model.go b/model.go index a75a60c..8da6832 100644 --- a/model.go +++ b/model.go @@ -187,3 +187,9 @@ type ExamRoomInfo struct { Time string // 考试时间 Location string // 考试地点 } + +type NoticeInfo struct { + Title string // 通知标题 + URL string // 通知链接 + Date string // 通知日期 +} diff --git a/notice.go b/notice.go new file mode 100644 index 0000000..2993d29 --- /dev/null +++ b/notice.go @@ -0,0 +1,82 @@ +/* +Copyright 2024 The west2-online Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package jwch + +import ( + "fmt" + "strings" + + "github.com/antchfx/htmlquery" + "golang.org/x/net/html" + + "github.com/west2-online/jwch/constants" +) + +func (s *Student) GetNoticeInfo() (list []*NoticeInfo, err error) { + // 获取通知公告 + // 1. 获取通知公告页面 + res, err := s.PostWithIdentifier(constants.NoticeInfoQueryURL, map[string]string{}) + if err != nil { + return nil, err + } + // 2. 解析页面 + list, err = parseNoticeInfo(res) + if err != nil { + return nil, err + } + // 3. 返回结果 + return list, nil +} + +func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { + // 解析通知公告页面 + var list []*NoticeInfo + + // 修正 XPath 表达式 + sel := htmlquery.FindOne(doc, "//div[@class='box-gl clearfix']") + if sel == nil { + return nil, fmt.Errorf("cannot find the notice list") + } + + // 查找所有的
  • 元素 + rows := htmlquery.Find(sel, ".//ul[@class='list-gl']/li") + + for _, row := range rows { + // 提取日期 + dateNode := htmlquery.FindOne(row, ".//span[@class='doclist_time']") + date := strings.TrimSpace(htmlquery.InnerText(dateNode)) + + // 提取标题 + titleNode := htmlquery.FindOne(row, ".//a") + + title := strings.TrimSpace(htmlquery.SelectAttr(titleNode, "title")) + + // 提取 URL + url := strings.TrimSpace(htmlquery.SelectAttr(titleNode, "href")) + url = constants.JwchNoticeURLPrefix + url + + noticeInfo := &NoticeInfo{ + Title: title, + URL: url, + Date: date, + } + + list = append(list, noticeInfo) + } + + return list, nil +} From 3dbfb0e81a84b6d0554123eaa5e254928d14c076 Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Tue, 7 Jan 2025 01:37:37 +0800 Subject: [PATCH 2/8] add NoticeInfoReq to get noticeInfo --- jwch_test.go | 2 +- model.go | 4 ++++ notice.go | 39 +++++++++++++++++++++++++++++++-------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/jwch_test.go b/jwch_test.go index 11fe259..2342a06 100644 --- a/jwch_test.go +++ b/jwch_test.go @@ -279,7 +279,7 @@ func TestGetExamRoomInfo(t *testing.T) { } func TestGetNoticesInfo(t *testing.T) { - _, err := stu.GetNoticeInfo() + _, err := stu.GetNoticeInfo(&NoticeInfoReq{PageNum: 2}) if err != nil { t.Error(err) } diff --git a/model.go b/model.go index 8da6832..5c6ee29 100644 --- a/model.go +++ b/model.go @@ -193,3 +193,7 @@ type NoticeInfo struct { URL string // 通知链接 Date string // 通知日期 } + +type NoticeInfoReq struct { + PageNum int // 获取第几页的数据,从 1 开始 +} diff --git a/notice.go b/notice.go index 2993d29..44215b9 100644 --- a/notice.go +++ b/notice.go @@ -26,15 +26,24 @@ import ( "github.com/west2-online/jwch/constants" ) -func (s *Student) GetNoticeInfo() (list []*NoticeInfo, err error) { - // 获取通知公告 - // 1. 获取通知公告页面 +func (s *Student) GetNoticeInfo(req *NoticeInfoReq) (list []*NoticeInfo, err error) { + // 获取通知公告页面的总页数 res, err := s.PostWithIdentifier(constants.NoticeInfoQueryURL, map[string]string{}) if err != nil { return nil, err } - // 2. 解析页面 - list, err = parseNoticeInfo(res) + lastPageNum, err := getTotalPages(res) + if err != nil { + return nil, err + } + // 根据总页数计算 url + num := lastPageNum - req.PageNum + 1 + url := fmt.Sprintf("https://jwch.fzu.edu.cn/jxtz/%d.htm", num) + doc, err := s.PostWithIdentifier(url, map[string]string{}) + if err != nil { + return nil, err + } + list, err = parseNoticeInfo(doc) if err != nil { return nil, err } @@ -42,17 +51,16 @@ func (s *Student) GetNoticeInfo() (list []*NoticeInfo, err error) { return list, nil } +// 获取当前页面的所有数据信息 func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { // 解析通知公告页面 var list []*NoticeInfo - // 修正 XPath 表达式 sel := htmlquery.FindOne(doc, "//div[@class='box-gl clearfix']") if sel == nil { return nil, fmt.Errorf("cannot find the notice list") } - // 查找所有的
  • 元素 rows := htmlquery.Find(sel, ".//ul[@class='list-gl']/li") for _, row := range rows { @@ -74,9 +82,24 @@ func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { URL: url, Date: date, } - list = append(list, noticeInfo) } return list, nil } + +// 获取总页数 +func getTotalPages(doc *html.Node) (int, error) { + totalPagesNode := htmlquery.FindOne(doc, "//span[@class='p_pages']//a[@href='jxtz/1.htm']") + if totalPagesNode == nil { + return 0, fmt.Errorf("未找到总页数") + } + + totalPagesStr := htmlquery.InnerText(totalPagesNode) + var totalPages int + _, err := fmt.Sscanf(totalPagesStr, "%d", &totalPages) + if err != nil { + return 0, fmt.Errorf("解析总页数失败: %v", err) + } + return totalPages, nil +} From 9681462dcc15b7392794c49dd1060dc3a2e17a77 Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Tue, 7 Jan 2025 01:49:43 +0800 Subject: [PATCH 3/8] fix getting the first page error --- notice.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/notice.go b/notice.go index 44215b9..7b4b09e 100644 --- a/notice.go +++ b/notice.go @@ -27,11 +27,21 @@ import ( ) func (s *Student) GetNoticeInfo(req *NoticeInfoReq) (list []*NoticeInfo, err error) { + // 获取通知公告页面的总页数 res, err := s.PostWithIdentifier(constants.NoticeInfoQueryURL, map[string]string{}) if err != nil { return nil, err } + // 首页直接爬取 + if req.PageNum == 1 { + list, err = parseNoticeInfo(res) + if err != nil { + return nil, err + } + return list, nil + } + // 分页需要根据页数计算 url lastPageNum, err := getTotalPages(res) if err != nil { return nil, err @@ -82,6 +92,7 @@ func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { URL: url, Date: date, } + fmt.Println(title) list = append(list, noticeInfo) } From c12bd0351ffa6986effe0d1bd938ef863ba4ff5e Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Tue, 7 Jan 2025 01:52:25 +0800 Subject: [PATCH 4/8] fix getting the first page error --- notice.go | 1 - 1 file changed, 1 deletion(-) diff --git a/notice.go b/notice.go index 7b4b09e..6774dc4 100644 --- a/notice.go +++ b/notice.go @@ -92,7 +92,6 @@ func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { URL: url, Date: date, } - fmt.Println(title) list = append(list, noticeInfo) } From f8682385738ad7967f5fc5c59887719c00f17839 Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Tue, 7 Jan 2025 12:30:18 +0800 Subject: [PATCH 5/8] fix code according to ci --- notice.go | 1 - 1 file changed, 1 deletion(-) diff --git a/notice.go b/notice.go index 6774dc4..6de16d3 100644 --- a/notice.go +++ b/notice.go @@ -27,7 +27,6 @@ import ( ) func (s *Student) GetNoticeInfo(req *NoticeInfoReq) (list []*NoticeInfo, err error) { - // 获取通知公告页面的总页数 res, err := s.PostWithIdentifier(constants.NoticeInfoQueryURL, map[string]string{}) if err != nil { From 520b3e3efb71914a27c8a35161a1d4cfbbd7f0cd Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Tue, 7 Jan 2025 17:08:06 +0800 Subject: [PATCH 6/8] add judgement for page num request --- notice.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/notice.go b/notice.go index 6de16d3..d561cb8 100644 --- a/notice.go +++ b/notice.go @@ -45,6 +45,10 @@ func (s *Student) GetNoticeInfo(req *NoticeInfoReq) (list []*NoticeInfo, err err if err != nil { return nil, err } + // 判断是否超出总页数 + if req.PageNum > lastPageNum { + return nil, fmt.Errorf("超出总页数") + } // 根据总页数计算 url num := lastPageNum - req.PageNum + 1 url := fmt.Sprintf("https://jwch.fzu.edu.cn/jxtz/%d.htm", num) From 5d32ab938ceb08225f0c6cc40c02f54cdc1ea7ad Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Fri, 10 Jan 2025 17:44:02 +0800 Subject: [PATCH 7/8] fix accroding to ci test --- constants/constants.go | 2 +- jwch_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/constants/constants.go b/constants/constants.go index 98d0a2f..75762df 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -31,7 +31,7 @@ const ( ExamRoomQueryURL = "https://jwcjwxt2.fzu.edu.cn:81/student/xkjg/examination/exam_list.aspx" NoticeInfoQueryURL = "https://jwch.fzu.edu.cn/jxtz.htm" JwchNoticeURLPrefix = "https://jwch.fzu.edu.cn/" - CultivatePlanURL = "https://jwcjwxt2.fzu.edu.cn:81/pyfa/pyjh/pyjh_list.aspx" + CultivatePlanURL = "https://jwcjwxt2.fzu.edu.cn:81/pyfa/pyjh/pyjh_list.aspx" JwchPrefix = "https://jwcjwxt2.fzu.edu.cn:81" JwchReferer = "https://jwcjwxt1.fzu.edu.cn/" diff --git a/jwch_test.go b/jwch_test.go index 31fdb60..f794c8e 100644 --- a/jwch_test.go +++ b/jwch_test.go @@ -284,6 +284,7 @@ func TestGetNoticesInfo(t *testing.T) { t.Error(err) } } + func TestGetCultivatePlan(t *testing.T) { _, err := stu.GetCultivatePlan() if err != nil { From 950c57c88a5b54421ba4a1980fd9d4ebdc8e0015 Mon Sep 17 00:00:00 2001 From: SchwarzSail <1424928981@qq.com> Date: Sat, 11 Jan 2025 20:18:37 +0800 Subject: [PATCH 8/8] fix according copilot --- jwch_test.go | 5 ++++- notice.go | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/jwch_test.go b/jwch_test.go index f794c8e..08068e1 100644 --- a/jwch_test.go +++ b/jwch_test.go @@ -279,10 +279,13 @@ func TestGetExamRoomInfo(t *testing.T) { } func TestGetNoticesInfo(t *testing.T) { - _, err := stu.GetNoticeInfo(&NoticeInfoReq{PageNum: 2}) + content, err := stu.GetNoticeInfo(&NoticeInfoReq{PageNum: 2}) if err != nil { t.Error(err) } + if content == nil { + t.Error("content is nil") + } } func TestGetCultivatePlan(t *testing.T) { diff --git a/notice.go b/notice.go index d561cb8..0bf0e80 100644 --- a/notice.go +++ b/notice.go @@ -79,6 +79,9 @@ func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { for _, row := range rows { // 提取日期 dateNode := htmlquery.FindOne(row, ".//span[@class='doclist_time']") + if dateNode == nil { + return nil, fmt.Errorf("cannot find the date") + } date := strings.TrimSpace(htmlquery.InnerText(dateNode)) // 提取标题