对应本博客还有一个简化版的,请参看:省市区级联SQL文件)
说明费了好大的劲把数据从官网上爬下来并导入到MySQL中 国家统计局官网地址:http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/54/5402.html
package com.hc;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.hc.domain.*;
import com.hc.mapper.*;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 全国省市县镇村数据爬取
*
* @author 梁云亮
*/
@Slf4j
@SpringBootTest
public class InitAdd5Tables {
/**
* 建立连接
*/
private Document connect(String url) {
if (url == null || url.isEmpty()) {
throw new IllegalArgumentException("无效的url");
}
try {
return Jsoup.connect(url).timeout(100 * 1000).get();
} catch (IOException e) {
System.out.println(url+"地址不存在");
return null;
}
}
/**
* 获取所有的省份
*
* @return
*/
public List getProvinces() {
List res = new ArrayList();
Document connect = connect("http://localhost:8080/2020/default.htm");
Elements rowProvince = connect.select("tr.provincetr");
for (Element provinceElement : rowProvince) {// 遍历每一行的省份城市
Elements select = provinceElement.select("a");
for (Element province : select) {// 每一个省份(四川省)
String name = province.text();
String code = province.select("a").attr("href");
res.add(code.substring(0, code.lastIndexOf(".")) + "*" + name);
}
}
return res;
}
@Test
public void testGetProvince() {
getProvinces().forEach(System.out::println);
}
@Resource
private ProvinceMapper provinceMapper;
@Test
void insertProvinces() {
List list = new ArrayList();
for (String p : getProvinces()) {
String[] split = p.split("\\*");
Province province = Province.builder().code(split[0]).name(split[1]).build();
list.add(province);
}
//list.forEach(System.out::println);
int res = provinceMapper.batchInsert(list);
System.out.println(res);
}
/**
* 根据省份编号获取该省份下所有的市
*
* @param provinceCode 省份编号
* @return
*/
public List getCitiesByProvince(String provinceCode) {
List res = new ArrayList();
Document connect = connect("http://localhost:8080/2020/" + provinceCode + ".html");
Elements rowCity = connect.select("tr.citytr");
for (Element cityElement : rowCity) {// 遍历每一行的省份城市
String name = cityElement.select("td").text();
String[] split = name.split(" ");
res.add(split[0].substring(0, 4) + "*" + split[1]);
}
return res;
}
@Test
public void testGetCitiesByProvince() {
getCitiesByProvince("41").forEach(System.out::println);
}
@Resource
private CityMapper cityMapper;
@Test
void insertCities() {
List pList = getProvinces();
for (String p : pList) {
List list = new ArrayList();
String[] split = p.split("\\*");
List cList = getCitiesByProvince(split[0]);
Province pp = provinceMapper.selectOne(new QueryWrapper().eq("code", split[0]));
for (String c : cList) {
String[] tmp = c.split("\\*");
City city = City.builder().name(tmp[1]).code(tmp[0]).provinceId(pp.getId()).build();
//System.out.println(city);
list.add(city);
}
//一个省一个省的添加
int res = cityMapper.batchInsert(list);
System.out.println(res);
}
}
/**
* 根据省市编号获取该省份下所有的县
*
* @param cityCode 市编号
* @return
*/
public List getCountriesByCity(String cityCode) {
List res = new ArrayList();
Document connect = connect("http://localhost:8080/2020/" + cityCode + ".html");
Elements rowCountry = connect.select("tr.countytr");
if (rowCountry.size() == 0) {
Elements townCountry = connect.select("tr.towntr");
for (Element townElement : townCountry) {
String txt = townElement.select("td").text();
String[] split = txt.split(" ");
res.add(split[0].substring(0, 9) + "*" + split[1]);
//比如海南省下的儋州市,只有4级目录,没有country
}
} else {
for (Element countryElement : rowCountry) {// 遍历每一行的省份城市
String txt = countryElement.select("td").text();
String[] split = txt.split(" ");
res.add(split[0].substring(0, 6) + "*" + split[1]);
}
}
return res;
}
@Test
void testGetCountiesByProvince() {
getCountriesByCity("46/4604").forEach(System.out::println);
}
@Resource
private CountryMapper countryMapper;
@Test
void insertCountry() {
List pList = getProvinces();
for (int i = 0; i
关注
打赏
最近更新
- 深拷贝和浅拷贝的区别(重点)
- 【Vue】走进Vue框架世界
- 【云服务器】项目部署—搭建网站—vue电商后台管理系统
- 【React介绍】 一文带你深入React
- 【React】React组件实例的三大属性之state,props,refs(你学废了吗)
- 【脚手架VueCLI】从零开始,创建一个VUE项目
- 【React】深入理解React组件生命周期----图文详解(含代码)
- 【React】DOM的Diffing算法是什么?以及DOM中key的作用----经典面试题
- 【React】1_使用React脚手架创建项目步骤--------详解(含项目结构说明)
- 【React】2_如何使用react脚手架写一个简单的页面?