正在显示
7 个修改的文件
包含
50 行增加
和
30 行删除
| @@ -16,8 +16,7 @@ import java.nio.charset.StandardCharsets; | @@ -16,8 +16,7 @@ import java.nio.charset.StandardCharsets; | ||
| 16 | import java.nio.file.Files; | 16 | import java.nio.file.Files; |
| 17 | import java.nio.file.Path; | 17 | import java.nio.file.Path; |
| 18 | import java.nio.file.Paths; | 18 | import java.nio.file.Paths; |
| 19 | -import java.util.List; | ||
| 20 | -import java.util.Map; | 19 | +import java.util.*; |
| 21 | 20 | ||
| 22 | public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionService{ | 21 | public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionService{ |
| 23 | protected final Logger logger = LoggerFactory.getLogger(this.getClass()); | 22 | protected final Logger logger = LoggerFactory.getLogger(this.getClass()); |
| @@ -64,7 +63,7 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS | @@ -64,7 +63,7 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS | ||
| 64 | } | 63 | } |
| 65 | } | 64 | } |
| 66 | 65 | ||
| 67 | - abstract List<FishAquaticPublicOpinion> collect(String day); | 66 | + abstract List<FishAquaticPublicOpinion> collect(String day,Set<String> dedupMap); |
| 68 | abstract String getInfo(String info_url); | 67 | abstract String getInfo(String info_url); |
| 69 | @Override | 68 | @Override |
| 70 | public void run(String day) { | 69 | public void run(String day) { |
| @@ -72,7 +71,9 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS | @@ -72,7 +71,9 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS | ||
| 72 | { | 71 | { |
| 73 | day = DateUtils.getDate(); | 72 | day = DateUtils.getDate(); |
| 74 | } | 73 | } |
| 75 | - List<FishAquaticPublicOpinion> list = collect(day); | 74 | + // 每个地区维护一个 Set 用于去重 |
| 75 | + Set<String> dedupMap = new HashSet<>(); | ||
| 76 | + List<FishAquaticPublicOpinion> list = collect(day,dedupMap); | ||
| 76 | if (null != list && list.size() != 0) | 77 | if (null != list && list.size() != 0) |
| 77 | { | 78 | { |
| 78 | //持久 | 79 | //持久 |
| @@ -21,10 +21,7 @@ import java.io.UnsupportedEncodingException; | @@ -21,10 +21,7 @@ import java.io.UnsupportedEncodingException; | ||
| 21 | import java.net.URL; | 21 | import java.net.URL; |
| 22 | import java.net.URLEncoder; | 22 | import java.net.URLEncoder; |
| 23 | import java.nio.charset.StandardCharsets; | 23 | import java.nio.charset.StandardCharsets; |
| 24 | -import java.util.ArrayList; | ||
| 25 | -import java.util.Date; | ||
| 26 | -import java.util.List; | ||
| 27 | -import java.util.Random; | 24 | +import java.util.*; |
| 28 | 25 | ||
| 29 | /** | 26 | /** |
| 30 | * 微信公众号 | 27 | * 微信公众号 |
| @@ -88,14 +85,14 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -88,14 +85,14 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 88 | 85 | ||
| 89 | } | 86 | } |
| 90 | @Override | 87 | @Override |
| 91 | - List<FishAquaticPublicOpinion> collect(String day) { | 88 | + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) { |
| 92 | Random rand = new Random(); | 89 | Random rand = new Random(); |
| 93 | 90 | ||
| 94 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 91 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| 95 | String[] gzh = new String[]{"中国水产","水产前沿","淡水渔业","海大集团","海洋与渔业杂志","华中渔业研究社","科学养鱼","南京渔业科技","农民日报","水产养殖编辑部","UCN国际海产资讯","养殖前沿","壹渔业","中国渔业报"}; | 92 | String[] gzh = new String[]{"中国水产","水产前沿","淡水渔业","海大集团","海洋与渔业杂志","华中渔业研究社","科学养鱼","南京渔业科技","农民日报","水产养殖编辑部","UCN国际海产资讯","养殖前沿","壹渔业","中国渔业报"}; |
| 96 | for (String name: gzh) | 93 | for (String name: gzh) |
| 97 | { | 94 | { |
| 98 | - List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2); | 95 | + List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2,dedupMap); |
| 99 | if (null != publist && publist.size()!=0) | 96 | if (null != publist && publist.size()!=0) |
| 100 | { | 97 | { |
| 101 | list.addAll(publist); | 98 | list.addAll(publist); |
| @@ -111,7 +108,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -111,7 +108,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 111 | String[] gzh_yj = new String[]{"海大农牧 水产一线","水产养殖网","农财宝典-大国渔业","大唐洋帆","禾晨科技","鱼市场","喜农和集团"}; | 108 | String[] gzh_yj = new String[]{"海大农牧 水产一线","水产养殖网","农财宝典-大国渔业","大唐洋帆","禾晨科技","鱼市场","喜农和集团"}; |
| 112 | for (String name: gzh_yj) | 109 | for (String name: gzh_yj) |
| 113 | { | 110 | { |
| 114 | - List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3); | 111 | + List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3,dedupMap); |
| 115 | if (null != publist && publist.size()!=0) | 112 | if (null != publist && publist.size()!=0) |
| 116 | { | 113 | { |
| 117 | list.addAll(publist); | 114 | list.addAll(publist); |
| @@ -211,7 +208,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -211,7 +208,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 211 | return null; | 208 | return null; |
| 212 | } | 209 | } |
| 213 | 210 | ||
| 214 | - private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType) | 211 | + private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType,Set<String> dedupMap) |
| 215 | { | 212 | { |
| 216 | String fakeid = getFakeid(name); | 213 | String fakeid = getFakeid(name); |
| 217 | if (StringUtils.isNotEmpty(fakeid)) | 214 | if (StringUtils.isNotEmpty(fakeid)) |
| @@ -221,7 +218,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -221,7 +218,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 221 | .header("cookie",cookie) | 218 | .header("cookie",cookie) |
| 222 | .execute().body(); | 219 | .execute().body(); |
| 223 | System.out.println(str); | 220 | System.out.println(str); |
| 224 | - return parsePublish(day,str,aquaticType); | 221 | + return parsePublish(day,str,aquaticType,dedupMap); |
| 225 | } | 222 | } |
| 226 | return null; | 223 | return null; |
| 227 | } | 224 | } |
| @@ -230,7 +227,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -230,7 +227,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 230 | * 解析publish返回的消息 | 227 | * 解析publish返回的消息 |
| 231 | * @param str | 228 | * @param str |
| 232 | */ | 229 | */ |
| 233 | - public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType) | 230 | + public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType,Set<String> dedupMap) |
| 234 | { | 231 | { |
| 235 | JSONObject jsonObject = JSONObject.parseObject(str); | 232 | JSONObject jsonObject = JSONObject.parseObject(str); |
| 236 | System.out.println(str); | 233 | System.out.println(str); |
| @@ -250,6 +247,10 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -250,6 +247,10 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 250 | if (null != appmsgex && appmsgex.size() != 0) { | 247 | if (null != appmsgex && appmsgex.size() != 0) { |
| 251 | JSONObject appmsg = appmsgex.getJSONObject(0); | 248 | JSONObject appmsg = appmsgex.getJSONObject(0); |
| 252 | String title = appmsg.getString("title"); | 249 | String title = appmsg.getString("title"); |
| 250 | + if(!dedupMap.contains(dedupMap)) | ||
| 251 | + { | ||
| 252 | + dedupMap.add(title); | ||
| 253 | + | ||
| 253 | Integer update_time = appmsg.getInteger("update_time"); | 254 | Integer update_time = appmsg.getInteger("update_time"); |
| 254 | Date timeday = new Date(update_time * 1000l); | 255 | Date timeday = new Date(update_time * 1000l); |
| 255 | String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday); | 256 | String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday); |
| @@ -265,6 +266,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | @@ -265,6 +266,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ | ||
| 265 | } else { | 266 | } else { |
| 266 | return list; | 267 | return list; |
| 267 | } | 268 | } |
| 269 | + } | ||
| 268 | 270 | ||
| 269 | } | 271 | } |
| 270 | } | 272 | } |
| @@ -12,11 +12,12 @@ import java.text.ParseException; | @@ -12,11 +12,12 @@ import java.text.ParseException; | ||
| 12 | import java.util.ArrayList; | 12 | import java.util.ArrayList; |
| 13 | import java.util.Date; | 13 | import java.util.Date; |
| 14 | import java.util.List; | 14 | import java.util.List; |
| 15 | +import java.util.Set; | ||
| 15 | 16 | ||
| 16 | @Service | 17 | @Service |
| 17 | public class WwwCafsAcCn extends AquaticPublicOpinionBase{ | 18 | public class WwwCafsAcCn extends AquaticPublicOpinionBase{ |
| 18 | @Override | 19 | @Override |
| 19 | - List<FishAquaticPublicOpinion> collect(String day) { | 20 | + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) { |
| 20 | 21 | ||
| 21 | String[] urls = {"https://www.cafs.ac.cn/kxyj/kyjz.htm","https://www.cafs.ac.cn/gjhz/gjhz.htm","https://www.cafs.ac.cn/djwh/djdt.htm","https://www.cafs.ac.cn/xwxx/tpxw.htm"}; | 22 | String[] urls = {"https://www.cafs.ac.cn/kxyj/kyjz.htm","https://www.cafs.ac.cn/gjhz/gjhz.htm","https://www.cafs.ac.cn/djwh/djdt.htm","https://www.cafs.ac.cn/xwxx/tpxw.htm"}; |
| 22 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 23 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| @@ -34,7 +35,9 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{ | @@ -34,7 +35,9 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{ | ||
| 34 | String url = a.attr("abs:href"); | 35 | String url = a.attr("abs:href"); |
| 35 | 36 | ||
| 36 | String title = a.attr("title"); | 37 | String title = a.attr("title"); |
| 37 | - | 38 | + if (!dedupMap.contains(title)) |
| 39 | + { | ||
| 40 | + dedupMap.add(title); | ||
| 38 | String time = li.select("span").text(); | 41 | String time = li.select("span").text(); |
| 39 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) | 42 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) |
| 40 | { | 43 | { |
| @@ -48,6 +51,8 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{ | @@ -48,6 +51,8 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{ | ||
| 48 | }else{ | 51 | }else{ |
| 49 | return list; | 52 | return list; |
| 50 | } | 53 | } |
| 54 | + } | ||
| 55 | + | ||
| 51 | 56 | ||
| 52 | } | 57 | } |
| 53 | } catch (Exception e) { | 58 | } catch (Exception e) { |
| @@ -11,20 +11,17 @@ import org.springframework.stereotype.Service; | @@ -11,20 +11,17 @@ import org.springframework.stereotype.Service; | ||
| 11 | import java.util.ArrayList; | 11 | import java.util.ArrayList; |
| 12 | import java.util.Date; | 12 | import java.util.Date; |
| 13 | import java.util.List; | 13 | import java.util.List; |
| 14 | +import java.util.Set; | ||
| 14 | 15 | ||
| 15 | @Service | 16 | @Service |
| 16 | public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ | 17 | public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ |
| 17 | - public static void main(String[] args) { | ||
| 18 | - WwwChinaCfaOrg wwwCsfishOrgCn = new WwwChinaCfaOrg(); | ||
| 19 | - wwwCsfishOrgCn.collect(""); | ||
| 20 | - } | ||
| 21 | /** | 18 | /** |
| 22 | * 中国渔业协会 | 19 | * 中国渔业协会 |
| 23 | * @param day | 20 | * @param day |
| 24 | * @return | 21 | * @return |
| 25 | */ | 22 | */ |
| 26 | @Override | 23 | @Override |
| 27 | - public List<FishAquaticPublicOpinion> collect(String day) | 24 | + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) |
| 28 | { | 25 | { |
| 29 | String[] urls = {"http://www.china-cfa.org/xwzx/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/hzjl/","http://www.china-cfa.org/hzzx/","http://www.china-cfa.org/jypx/","http://www.china-cfa.org/bzgz/"}; | 26 | String[] urls = {"http://www.china-cfa.org/xwzx/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/hzjl/","http://www.china-cfa.org/hzzx/","http://www.china-cfa.org/jypx/","http://www.china-cfa.org/bzgz/"}; |
| 30 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 27 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| @@ -42,7 +39,9 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ | @@ -42,7 +39,9 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ | ||
| 42 | String url = a.attr("abs:href"); | 39 | String url = a.attr("abs:href"); |
| 43 | 40 | ||
| 44 | String title = a.attr("title"); | 41 | String title = a.attr("title"); |
| 45 | - | 42 | + if (!dedupMap.contains(title)) |
| 43 | + { | ||
| 44 | + dedupMap.add(title); | ||
| 46 | String time = li.select("span").text(); | 45 | String time = li.select("span").text(); |
| 47 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) | 46 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) |
| 48 | { | 47 | { |
| @@ -57,6 +56,8 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ | @@ -57,6 +56,8 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ | ||
| 57 | return list; | 56 | return list; |
| 58 | } | 57 | } |
| 59 | } | 58 | } |
| 59 | + | ||
| 60 | + } | ||
| 60 | } catch (Exception e) { | 61 | } catch (Exception e) { |
| 61 | logger.error("数据解析错误:"+domain,e); | 62 | logger.error("数据解析错误:"+domain,e); |
| 62 | } | 63 | } |
| @@ -11,12 +11,13 @@ import org.springframework.stereotype.Service; | @@ -11,12 +11,13 @@ import org.springframework.stereotype.Service; | ||
| 11 | import java.util.ArrayList; | 11 | import java.util.ArrayList; |
| 12 | import java.util.Date; | 12 | import java.util.Date; |
| 13 | import java.util.List; | 13 | import java.util.List; |
| 14 | +import java.util.Set; | ||
| 14 | 15 | ||
| 15 | @Service | 16 | @Service |
| 16 | public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ | 17 | public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ |
| 17 | 18 | ||
| 18 | @Override | 19 | @Override |
| 19 | - List<FishAquaticPublicOpinion> collect(String day) { | 20 | + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) { |
| 20 | String domain = "http://www.csfish.org.cn/catalog/197"; | 21 | String domain = "http://www.csfish.org.cn/catalog/197"; |
| 21 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 22 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| 22 | try { | 23 | try { |
| @@ -31,7 +32,8 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ | @@ -31,7 +32,8 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ | ||
| 31 | String url = a.attr("abs:href"); | 32 | String url = a.attr("abs:href"); |
| 32 | 33 | ||
| 33 | String title = a.text(); | 34 | String title = a.text(); |
| 34 | - | 35 | + if (!dedupMap.contains(title)) { |
| 36 | + dedupMap.add(title); | ||
| 35 | String time = li.select("span").text(); | 37 | String time = li.select("span").text(); |
| 36 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) | 38 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) |
| 37 | { | 39 | { |
| @@ -47,6 +49,8 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ | @@ -47,6 +49,8 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ | ||
| 47 | return list; | 49 | return list; |
| 48 | } | 50 | } |
| 49 | } | 51 | } |
| 52 | + | ||
| 53 | + } | ||
| 50 | } catch (Exception e) { | 54 | } catch (Exception e) { |
| 51 | logger.error("数据解析错误:"+domain,e); | 55 | logger.error("数据解析错误:"+domain,e); |
| 52 | } | 56 | } |
| @@ -23,11 +23,11 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | @@ -23,11 +23,11 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | ||
| 23 | * @return | 23 | * @return |
| 24 | */ | 24 | */ |
| 25 | @Override | 25 | @Override |
| 26 | - public List<FishAquaticPublicOpinion> collect(String day) | 26 | + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) |
| 27 | { | 27 | { |
| 28 | //先下载规章 | 28 | //先下载规章 |
| 29 | nyncbgzk(); | 29 | nyncbgzk(); |
| 30 | - List<FishAquaticPublicOpinion> list = govpublic(day); | 30 | + List<FishAquaticPublicOpinion> list = govpublic(day,dedupMap); |
| 31 | return list; | 31 | return list; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| @@ -67,7 +67,7 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | @@ -67,7 +67,7 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | ||
| 67 | wwwMoaGovCn.nyncbgzk(); | 67 | wwwMoaGovCn.nyncbgzk(); |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | - private List<FishAquaticPublicOpinion> govpublic(String day) | 70 | + private List<FishAquaticPublicOpinion> govpublic(String day, Set<String> dedupMap) |
| 71 | { | 71 | { |
| 72 | String domain = "https://www.moa.gov.cn/govpublic/"; | 72 | String domain = "https://www.moa.gov.cn/govpublic/"; |
| 73 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 73 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| @@ -83,7 +83,8 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | @@ -83,7 +83,8 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | ||
| 83 | String url = a.attr("abs:href"); | 83 | String url = a.attr("abs:href"); |
| 84 | 84 | ||
| 85 | String title = a.attr("title"); | 85 | String title = a.attr("title"); |
| 86 | - | 86 | + if (!dedupMap.contains(title)) { |
| 87 | + dedupMap.add(title); | ||
| 87 | String time = li.select("span").text(); | 88 | String time = li.select("span").text(); |
| 88 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) | 89 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) |
| 89 | { | 90 | { |
| @@ -97,6 +98,8 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | @@ -97,6 +98,8 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ | ||
| 97 | }else { | 98 | }else { |
| 98 | return list; | 99 | return list; |
| 99 | } | 100 | } |
| 101 | + } | ||
| 102 | + | ||
| 100 | 103 | ||
| 101 | } | 104 | } |
| 102 | } catch (Exception e) { | 105 | } catch (Exception e) { |
| @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service; | @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service; | ||
| 16 | import java.util.ArrayList; | 16 | import java.util.ArrayList; |
| 17 | import java.util.Date; | 17 | import java.util.Date; |
| 18 | import java.util.List; | 18 | import java.util.List; |
| 19 | +import java.util.Set; | ||
| 19 | 20 | ||
| 20 | /** | 21 | /** |
| 21 | * 全国水产技术推广总站、中国水产学会 | 22 | * 全国水产技术推广总站、中国水产学会 |
| @@ -29,7 +30,7 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | @@ -29,7 +30,7 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | ||
| 29 | * @return | 30 | * @return |
| 30 | */ | 31 | */ |
| 31 | @Override | 32 | @Override |
| 32 | - public List<FishAquaticPublicOpinion> collect(String day) | 33 | + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) |
| 33 | { | 34 | { |
| 34 | String[] urls = {"http://www.nftec.agri.cn/tzgg/","http://www.nftec.agri.cn/bwzc/","http://www.nftec.agri.cn/zzxhdt/","http://www.nftec.agri.cn/dfgz/","http://www.nftec.agri.cn/zcfg/"}; | 35 | String[] urls = {"http://www.nftec.agri.cn/tzgg/","http://www.nftec.agri.cn/bwzc/","http://www.nftec.agri.cn/zzxhdt/","http://www.nftec.agri.cn/dfgz/","http://www.nftec.agri.cn/zcfg/"}; |
| 35 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); | 36 | List<FishAquaticPublicOpinion> list = new ArrayList<>(); |
| @@ -47,7 +48,8 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | @@ -47,7 +48,8 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | ||
| 47 | String url = a.attr("abs:href"); | 48 | String url = a.attr("abs:href"); |
| 48 | 49 | ||
| 49 | String title = a.attr("title"); | 50 | String title = a.attr("title"); |
| 50 | - | 51 | + if (!dedupMap.contains(title)) { |
| 52 | + dedupMap.add(title); | ||
| 51 | String time = li.select("span").text().replace("(","").replace(")",""); | 53 | String time = li.select("span").text().replace("(","").replace(")",""); |
| 52 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) | 54 | if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) |
| 53 | { | 55 | { |
| @@ -61,6 +63,8 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | @@ -61,6 +63,8 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ | ||
| 61 | }else { | 63 | }else { |
| 62 | return list; | 64 | return list; |
| 63 | } | 65 | } |
| 66 | + } | ||
| 67 | + | ||
| 64 | 68 | ||
| 65 | } | 69 | } |
| 66 | } catch (Exception e) { | 70 | } catch (Exception e) { |
-
请 注册 或 登录 后发表评论