您当前的位置: 首页 > 

lootaa

暂无认证

  • 0浏览

    0关注

    68博文

    0收益

  • 0浏览

    0点赞

    0打赏

    0留言

私信
关注
热门博文

获取高校信息

lootaa 发布时间:2022-04-14 01:15:43 ,浏览量:0

说明

开发的系统中,高校信息的作用是个人选的教育经历打上标签,方便筛选优质人才。 传统一些的方式就是985/211,但是现在主流的方式是双一流。尽管如此,用985/211来筛选也存在很大程度上的合理性,所以从多个角度都进行了查询。

数据来源方面,使用了阳关高考网和中国教育在线两个网站。 阳关高考网:https://gaokao.chsi.com.cn/sch/search.do 中国教育在线:https://daxue.eol.cn/ 查询流程: 1 通过阳关高考网选择民办大学,将所有民办大学筛选出来 2 通过中国教育在线筛选出985和211院校 3 查询阳光高考网数据列表,每个学校都判断是否是民办or985/211,打上标签

技术说明

开发语言选择的是Java,pom.xml中引入了jsoup用来网络请求

		
            org.jsoup
            jsoup
            1.12.1
        
高校表结构设计

直接将数据内容保存即可,单一表

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;

/**
 * 高校信息
 */
@Entity
public class University {

	@Id
	@GeneratedValue
	private Long zid;
	
	/** 创建时间 **/
	@Column(length=19, nullable=false)
	private String createTime;
	
	/** 更新时间 **/
	@Column(length=19)
	private String updateTime;
	
	/** 是否删除 1是 0否 **/
	@Column(nullable=false)
	private Integer isDelete;
	
	/** 院校名称 **/
	@Column(length=300, nullable=false)
	private String name;
	
	/** 院校所在地 **/
	@Column(length=300, nullable=false)
	private String city;
	
	/** 院校隶属 **/
	@Column(length=300, nullable=false)
	private String owner;
	
	/** 学历层次 **/
	@Column(length=100, nullable=false)
	private String level;
	
	/** 院校特性 985|211、211、民办大学 **/
	@Column(length=100)
	private String feature;
	
	/** 一流大学 1是 0否 **/
	@Column(nullable=false)
	private Integer leadingUniversity;
	
	/** 一流学科 1是 0否 **/
	@Column(nullable=false)
	private Integer leadingDisciplines;

	/** 是否有研究生院 1有 0无 **/
	@Column(nullable=false)
	private Integer institute;

	public Integer getLeadingUniversity() {
		return leadingUniversity;
	}

	public void setLeadingUniversity(Integer leadingUniversity) {
		this.leadingUniversity = leadingUniversity;
	}

	public Integer getLeadingDisciplines() {
		return leadingDisciplines;
	}

	public void setLeadingDisciplines(Integer leadingDisciplines) {
		this.leadingDisciplines = leadingDisciplines;
	}

	public Long getZid() {
		return zid;
	}

	public void setZid(Long zid) {
		this.zid = zid;
	}

	public String getCreateTime() {
		return createTime;
	}

	public void setCreateTime(String createTime) {
		this.createTime = createTime;
	}

	public String getUpdateTime() {
		return updateTime;
	}

	public void setUpdateTime(String updateTime) {
		this.updateTime = updateTime;
	}

	public Integer getIsDelete() {
		return isDelete;
	}

	public void setIsDelete(Integer isDelete) {
		this.isDelete = isDelete;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public String getCity() {
		return city;
	}

	public void setCity(String city) {
		this.city = city;
	}

	public String getOwner() {
		return owner;
	}

	public void setOwner(String owner) {
		this.owner = owner;
	}

	public String getLevel() {
		return level;
	}

	public void setLevel(String level) {
		this.level = level;
	}

	public String getFeature() {
		return feature;
	}

	public void setFeature(String feature) {
		this.feature = feature;
	}

	public Integer getInstitute() {
		return institute;
	}

	public void setInstitute(Integer institute) {
		this.institute = institute;
	}
	
}

辅助方法

用来判断某个学校是否是985/211,之所以没直接对比学校名称,是考虑到如果某学校是985/211,那么分校也理应是985/211。

	private boolean schoolContain(String name, List school) {
		boolean contain =false;
		for(String item : school) {
			if(name.contains(item)) {
				contain = true;
				break;
			}
		}
		return contain;
	}
代码流程

获取985院校列表

		//985院校
		String url = "https://daxue.eol.cn/985.shtml";
		Document doc = Jsoup.connect(url).get();
		List school985 = new ArrayList();
		Elements eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");
		for(Element ele : eles) {
			Elements items = ele.getElementsByTag("td");
			school985.add(items.get(items.size()-3).text());
		}

获取211院校列表

		//211院校
		url = "https://daxue.eol.cn/211.shtml";
		doc = Jsoup.connect(url).get();
		List school211 = new ArrayList();
		eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");
		for(Element ele : eles) {
			Elements items = ele.getElementsByTag("td");
			school211.add(items.get(items.size()-3).text());
		}

获取民办大学列表

		// 民办大学
		url = "https://gaokao.chsi.com.cn/sch/search.do?searchType=1&yxmc=&ssdm=&yxls=&xlcc=&yxjbz=2";
		doc = Jsoup.connect(url).get();
		eles = doc.getElementById("PageForm").getElementsByTag("li");
		Element ele = eles.get(eles.size() - 3);
		int count = Integer.parseInt(ele.text());
		List minbanSchoolName = new ArrayList();
		for (int i = 0; i < count; i++) {
			url = "https://gaokao.chsi.com.cn/sch/search--searchType-1,yxjbz-2,start-" + (i * 20) + ".dhtml";
			doc = Jsoup.connect(url).get();
			eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");
			for (int j = 1, num = eles.size(); j < num; j++) {
				Elements items = eles.get(j).getElementsByTag("td");
				minbanSchoolName.add(items.get(0).text());
			}
			Thread.sleep(2000);
		}

查询高校列表并保存到数据库

		// 查询列表数据
		url = "https://gaokao.chsi.com.cn/sch/search.do";
		doc = Jsoup.connect(url).get();
		eles = doc.getElementById("PageForm").getElementsByTag("li");
		ele = eles.get(eles.size() - 3);
		count = Integer.parseInt(ele.text());
		for (int i = 0; i < count; i++) {
			url = "https://gaokao.chsi.com.cn/sch/search--start-" + (i * 20) + ".dhtml";
			doc = Jsoup.connect(url).get();
			eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");
			for (int j = 1, num = eles.size(); j < num; j++) {
				Elements items = eles.get(j).getElementsByTag("td");
				
				University school = new University();
				school.setCity(items.get(1).text());
				school.setCreateTime(Dates.now());
				school.setLeadingUniversity(StringUtil.isBlank(items.get(4).text())?0:1);
				school.setLeadingDisciplines(StringUtil.isBlank(items.get(5).text())?0:1);
				if(minbanSchoolName.contains(items.get(0).text())) {
					school.setFeature("民办");
				} else {
					if(schoolContain(items.get(0).text(), school985)) {
						school.setFeature("985|211");
					} else if(schoolContain(items.get(0).text(), school211)) {
						school.setFeature("211");
					}
				}
				school.setInstitute(StringUtil.isBlank(items.get(6).text())?0:1);
				school.setIsDelete(0);
				school.setLevel(items.get(3).text());
				school.setName(items.get(0).text());
				school.setOwner(items.get(2).text());
				schoolRepository.save(school);
				
			}
			Thread.sleep(2000);
		}
完整测试类
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import com.lootaa.xcl.basedata.dao.UniversityRepository;
import com.lootaa.xcl.basedata.db.University;
import com.lootaa.xcl.basedata.util.Dates;

@SpringBootTest
class XclBasedataApplicationSchoolTests {

	@Autowired UniversityRepository schoolRepository;
	
	@Test
	void loadSchool() throws Exception {
		schoolRepository.deleteAll();
		
		//985院校
		String url = "https://daxue.eol.cn/985.shtml";
		Document doc = Jsoup.connect(url).get();
		List school985 = new ArrayList();
		Elements eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");
		for(Element ele : eles) {
			Elements items = ele.getElementsByTag("td");
			school985.add(items.get(items.size()-3).text());
		}
		
		//211院校
		url = "https://daxue.eol.cn/211.shtml";
		doc = Jsoup.connect(url).get();
		List school211 = new ArrayList();
		eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");
		for(Element ele : eles) {
			Elements items = ele.getElementsByTag("td");
			school211.add(items.get(items.size()-3).text());
		}
		
		// 民办大学
		url = "https://gaokao.chsi.com.cn/sch/search.do?searchType=1&yxmc=&ssdm=&yxls=&xlcc=&yxjbz=2";
		doc = Jsoup.connect(url).get();
		eles = doc.getElementById("PageForm").getElementsByTag("li");
		Element ele = eles.get(eles.size() - 3);
		int count = Integer.parseInt(ele.text());
		List minbanSchoolName = new ArrayList();
		for (int i = 0; i < count; i++) {
			url = "https://gaokao.chsi.com.cn/sch/search--searchType-1,yxjbz-2,start-" + (i * 20) + ".dhtml";
			doc = Jsoup.connect(url).get();
			eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");
			for (int j = 1, num = eles.size(); j < num; j++) {
				Elements items = eles.get(j).getElementsByTag("td");
				minbanSchoolName.add(items.get(0).text());
			}
			Thread.sleep(2000);
		}

		// 查询列表数据
		url = "https://gaokao.chsi.com.cn/sch/search.do";
		doc = Jsoup.connect(url).get();
		eles = doc.getElementById("PageForm").getElementsByTag("li");
		ele = eles.get(eles.size() - 3);
		count = Integer.parseInt(ele.text());
		for (int i = 0; i < count; i++) {
			url = "https://gaokao.chsi.com.cn/sch/search--start-" + (i * 20) + ".dhtml";
			doc = Jsoup.connect(url).get();
			eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");
			for (int j = 1, num = eles.size(); j < num; j++) {
				Elements items = eles.get(j).getElementsByTag("td");
				
				University school = new University();
				school.setCity(items.get(1).text());
				school.setCreateTime(Dates.now());
				school.setLeadingUniversity(StringUtil.isBlank(items.get(4).text())?0:1);
				school.setLeadingDisciplines(StringUtil.isBlank(items.get(5).text())?0:1);
				if(minbanSchoolName.contains(items.get(0).text())) {
					school.setFeature("民办");
				} else {
					if(schoolContain(items.get(0).text(), school985)) {
						school.setFeature("985|211");
					} else if(schoolContain(items.get(0).text(), school211)) {
						school.setFeature("211");
					}
				}
				school.setInstitute(StringUtil.isBlank(items.get(6).text())?0:1);
				school.setIsDelete(0);
				school.setLevel(items.get(3).text());
				school.setName(items.get(0).text());
				school.setOwner(items.get(2).text());
				schoolRepository.save(school);
				
			}
			Thread.sleep(2000);
		}

	}
	
	private boolean schoolContain(String name, List school) {
		boolean contain =false;
		for(String item : school) {
			if(name.contains(item)) {
				contain = true;
				break;
			}
		}
		return contain;
	}

}
保存的数据

在这里插入图片描述

关注
打赏
1663829960
查看更多评论
立即登录/注册

微信扫码登录

0.0444s