作者 钟来

水产舆情采集海报生成过滤广告时间参数bug

@@ -16,8 +16,7 @@ import java.nio.charset.StandardCharsets; @@ -16,8 +16,7 @@ import java.nio.charset.StandardCharsets;
16 import java.nio.file.Files; 16 import java.nio.file.Files;
17 import java.nio.file.Path; 17 import java.nio.file.Path;
18 import java.nio.file.Paths; 18 import java.nio.file.Paths;
19 -import java.util.List;  
20 -import java.util.Map; 19 +import java.util.*;
21 20
22 public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionService{ 21 public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionService{
23 protected final Logger logger = LoggerFactory.getLogger(this.getClass()); 22 protected final Logger logger = LoggerFactory.getLogger(this.getClass());
@@ -64,7 +63,7 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS @@ -64,7 +63,7 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS
64 } 63 }
65 } 64 }
66 65
67 - abstract List<FishAquaticPublicOpinion> collect(String day); 66 + abstract List<FishAquaticPublicOpinion> collect(String day,Set<String> dedupMap);
68 abstract String getInfo(String info_url); 67 abstract String getInfo(String info_url);
69 @Override 68 @Override
70 public void run(String day) { 69 public void run(String day) {
@@ -72,7 +71,9 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS @@ -72,7 +71,9 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS
72 { 71 {
73 day = DateUtils.getDate(); 72 day = DateUtils.getDate();
74 } 73 }
75 - List<FishAquaticPublicOpinion> list = collect(day); 74 + // 每个地区维护一个 Set 用于去重
  75 + Set<String> dedupMap = new HashSet<>();
  76 + List<FishAquaticPublicOpinion> list = collect(day,dedupMap);
76 if (null != list && list.size() != 0) 77 if (null != list && list.size() != 0)
77 { 78 {
78 //持久 79 //持久
@@ -21,10 +21,7 @@ import java.io.UnsupportedEncodingException; @@ -21,10 +21,7 @@ import java.io.UnsupportedEncodingException;
21 import java.net.URL; 21 import java.net.URL;
22 import java.net.URLEncoder; 22 import java.net.URLEncoder;
23 import java.nio.charset.StandardCharsets; 23 import java.nio.charset.StandardCharsets;
24 -import java.util.ArrayList;  
25 -import java.util.Date;  
26 -import java.util.List;  
27 -import java.util.Random; 24 +import java.util.*;
28 25
29 /** 26 /**
30 * 微信公众号 27 * 微信公众号
@@ -88,14 +85,14 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -88,14 +85,14 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
88 85
89 } 86 }
90 @Override 87 @Override
91 - List<FishAquaticPublicOpinion> collect(String day) { 88 + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
92 Random rand = new Random(); 89 Random rand = new Random();
93 90
94 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 91 List<FishAquaticPublicOpinion> list = new ArrayList<>();
95 String[] gzh = new String[]{"中国水产","水产前沿","淡水渔业","海大集团","海洋与渔业杂志","华中渔业研究社","科学养鱼","南京渔业科技","农民日报","水产养殖编辑部","UCN国际海产资讯","养殖前沿","壹渔业","中国渔业报"}; 92 String[] gzh = new String[]{"中国水产","水产前沿","淡水渔业","海大集团","海洋与渔业杂志","华中渔业研究社","科学养鱼","南京渔业科技","农民日报","水产养殖编辑部","UCN国际海产资讯","养殖前沿","壹渔业","中国渔业报"};
96 for (String name: gzh) 93 for (String name: gzh)
97 { 94 {
98 - List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2); 95 + List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2,dedupMap);
99 if (null != publist && publist.size()!=0) 96 if (null != publist && publist.size()!=0)
100 { 97 {
101 list.addAll(publist); 98 list.addAll(publist);
@@ -111,7 +108,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -111,7 +108,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
111 String[] gzh_yj = new String[]{"海大农牧 水产一线","水产养殖网","农财宝典-大国渔业","大唐洋帆","禾晨科技","鱼市场","喜农和集团"}; 108 String[] gzh_yj = new String[]{"海大农牧 水产一线","水产养殖网","农财宝典-大国渔业","大唐洋帆","禾晨科技","鱼市场","喜农和集团"};
112 for (String name: gzh_yj) 109 for (String name: gzh_yj)
113 { 110 {
114 - List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3); 111 + List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3,dedupMap);
115 if (null != publist && publist.size()!=0) 112 if (null != publist && publist.size()!=0)
116 { 113 {
117 list.addAll(publist); 114 list.addAll(publist);
@@ -211,7 +208,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -211,7 +208,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
211 return null; 208 return null;
212 } 209 }
213 210
214 - private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType) 211 + private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType,Set<String> dedupMap)
215 { 212 {
216 String fakeid = getFakeid(name); 213 String fakeid = getFakeid(name);
217 if (StringUtils.isNotEmpty(fakeid)) 214 if (StringUtils.isNotEmpty(fakeid))
@@ -221,7 +218,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -221,7 +218,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
221 .header("cookie",cookie) 218 .header("cookie",cookie)
222 .execute().body(); 219 .execute().body();
223 System.out.println(str); 220 System.out.println(str);
224 - return parsePublish(day,str,aquaticType); 221 + return parsePublish(day,str,aquaticType,dedupMap);
225 } 222 }
226 return null; 223 return null;
227 } 224 }
@@ -230,7 +227,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -230,7 +227,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
230 * 解析publish返回的消息 227 * 解析publish返回的消息
231 * @param str 228 * @param str
232 */ 229 */
233 - public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType) 230 + public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType,Set<String> dedupMap)
234 { 231 {
235 JSONObject jsonObject = JSONObject.parseObject(str); 232 JSONObject jsonObject = JSONObject.parseObject(str);
236 System.out.println(str); 233 System.out.println(str);
@@ -250,20 +247,25 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{ @@ -250,20 +247,25 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
250 if (null != appmsgex && appmsgex.size() != 0) { 247 if (null != appmsgex && appmsgex.size() != 0) {
251 JSONObject appmsg = appmsgex.getJSONObject(0); 248 JSONObject appmsg = appmsgex.getJSONObject(0);
252 String title = appmsg.getString("title"); 249 String title = appmsg.getString("title");
253 - Integer update_time = appmsg.getInteger("update_time");  
254 - Date timeday = new Date(update_time * 1000l);  
255 - String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday);  
256 - if (StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day, DateUtils.YYYY_MM_DD))) {  
257 - String info_url = appmsg.getString("link");  
258 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
259 - aquaticPublicOpinion.setTitle(title);  
260 - aquaticPublicOpinion.setInfoUrl(info_url);  
261 - aquaticPublicOpinion.setReleaseTime(timeday);  
262 - aquaticPublicOpinion.setCreateTime(new Date());  
263 - aquaticPublicOpinion.setAquaticType(aquaticType);  
264 - list.add(aquaticPublicOpinion);  
265 - } else {  
266 - return list; 250 + if(!dedupMap.contains(dedupMap))
  251 + {
  252 + dedupMap.add(title);
  253 +
  254 + Integer update_time = appmsg.getInteger("update_time");
  255 + Date timeday = new Date(update_time * 1000l);
  256 + String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday);
  257 + if (StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day, DateUtils.YYYY_MM_DD))) {
  258 + String info_url = appmsg.getString("link");
  259 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  260 + aquaticPublicOpinion.setTitle(title);
  261 + aquaticPublicOpinion.setInfoUrl(info_url);
  262 + aquaticPublicOpinion.setReleaseTime(timeday);
  263 + aquaticPublicOpinion.setCreateTime(new Date());
  264 + aquaticPublicOpinion.setAquaticType(aquaticType);
  265 + list.add(aquaticPublicOpinion);
  266 + } else {
  267 + return list;
  268 + }
267 } 269 }
268 270
269 } 271 }
@@ -12,11 +12,12 @@ import java.text.ParseException; @@ -12,11 +12,12 @@ import java.text.ParseException;
12 import java.util.ArrayList; 12 import java.util.ArrayList;
13 import java.util.Date; 13 import java.util.Date;
14 import java.util.List; 14 import java.util.List;
  15 +import java.util.Set;
15 16
16 @Service 17 @Service
17 public class WwwCafsAcCn extends AquaticPublicOpinionBase{ 18 public class WwwCafsAcCn extends AquaticPublicOpinionBase{
18 @Override 19 @Override
19 - List<FishAquaticPublicOpinion> collect(String day) { 20 + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
20 21
21 String[] urls = {"https://www.cafs.ac.cn/kxyj/kyjz.htm","https://www.cafs.ac.cn/gjhz/gjhz.htm","https://www.cafs.ac.cn/djwh/djdt.htm","https://www.cafs.ac.cn/xwxx/tpxw.htm"}; 22 String[] urls = {"https://www.cafs.ac.cn/kxyj/kyjz.htm","https://www.cafs.ac.cn/gjhz/gjhz.htm","https://www.cafs.ac.cn/djwh/djdt.htm","https://www.cafs.ac.cn/xwxx/tpxw.htm"};
22 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 23 List<FishAquaticPublicOpinion> list = new ArrayList<>();
@@ -34,21 +35,25 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{ @@ -34,21 +35,25 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{
34 String url = a.attr("abs:href"); 35 String url = a.attr("abs:href");
35 36
36 String title = a.attr("title"); 37 String title = a.attr("title");
37 -  
38 - String time = li.select("span").text();  
39 - if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) 38 + if (!dedupMap.contains(title))
40 { 39 {
41 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
42 - aquaticPublicOpinion.setTitle(title);  
43 - aquaticPublicOpinion.setInfoUrl(url);  
44 - aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,"yyyy年MM月dd日"));  
45 - aquaticPublicOpinion.setCreateTime(new Date());  
46 - aquaticPublicOpinion.setAquaticType(1);  
47 - list.add(aquaticPublicOpinion);  
48 - }else{  
49 - return list; 40 + dedupMap.add(title);
  41 + String time = li.select("span").text();
  42 + if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
  43 + {
  44 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  45 + aquaticPublicOpinion.setTitle(title);
  46 + aquaticPublicOpinion.setInfoUrl(url);
  47 + aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,"yyyy年MM月dd日"));
  48 + aquaticPublicOpinion.setCreateTime(new Date());
  49 + aquaticPublicOpinion.setAquaticType(1);
  50 + list.add(aquaticPublicOpinion);
  51 + }else{
  52 + return list;
  53 + }
50 } 54 }
51 55
  56 +
52 } 57 }
53 } catch (Exception e) { 58 } catch (Exception e) {
54 logger.error("数据解析错误:"+domain,e); 59 logger.error("数据解析错误:"+domain,e);
@@ -11,20 +11,17 @@ import org.springframework.stereotype.Service; @@ -11,20 +11,17 @@ import org.springframework.stereotype.Service;
11 import java.util.ArrayList; 11 import java.util.ArrayList;
12 import java.util.Date; 12 import java.util.Date;
13 import java.util.List; 13 import java.util.List;
  14 +import java.util.Set;
14 15
15 @Service 16 @Service
16 public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ 17 public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{
17 - public static void main(String[] args) {  
18 - WwwChinaCfaOrg wwwCsfishOrgCn = new WwwChinaCfaOrg();  
19 - wwwCsfishOrgCn.collect("");  
20 - }  
21 /** 18 /**
22 * 中国渔业协会 19 * 中国渔业协会
23 * @param day 20 * @param day
24 * @return 21 * @return
25 */ 22 */
26 @Override 23 @Override
27 - public List<FishAquaticPublicOpinion> collect(String day) 24 + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
28 { 25 {
29 String[] urls = {"http://www.china-cfa.org/xwzx/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/hzjl/","http://www.china-cfa.org/hzzx/","http://www.china-cfa.org/jypx/","http://www.china-cfa.org/bzgz/"}; 26 String[] urls = {"http://www.china-cfa.org/xwzx/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/hzjl/","http://www.china-cfa.org/hzzx/","http://www.china-cfa.org/jypx/","http://www.china-cfa.org/bzgz/"};
30 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 27 List<FishAquaticPublicOpinion> list = new ArrayList<>();
@@ -42,20 +39,24 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{ @@ -42,20 +39,24 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{
42 String url = a.attr("abs:href"); 39 String url = a.attr("abs:href");
43 40
44 String title = a.attr("title"); 41 String title = a.attr("title");
45 -  
46 - String time = li.select("span").text();  
47 - if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD))) 42 + if (!dedupMap.contains(title))
48 { 43 {
49 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
50 - aquaticPublicOpinion.setTitle(title);  
51 - aquaticPublicOpinion.setInfoUrl(url);  
52 - aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));  
53 - aquaticPublicOpinion.setCreateTime(new Date());  
54 - aquaticPublicOpinion.setAquaticType(1);  
55 - list.add(aquaticPublicOpinion);  
56 - }else{  
57 - return list; 44 + dedupMap.add(title);
  45 + String time = li.select("span").text();
  46 + if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
  47 + {
  48 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  49 + aquaticPublicOpinion.setTitle(title);
  50 + aquaticPublicOpinion.setInfoUrl(url);
  51 + aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
  52 + aquaticPublicOpinion.setCreateTime(new Date());
  53 + aquaticPublicOpinion.setAquaticType(1);
  54 + list.add(aquaticPublicOpinion);
  55 + }else{
  56 + return list;
  57 + }
58 } 58 }
  59 +
59 } 60 }
60 } catch (Exception e) { 61 } catch (Exception e) {
61 logger.error("数据解析错误:"+domain,e); 62 logger.error("数据解析错误:"+domain,e);
@@ -11,12 +11,13 @@ import org.springframework.stereotype.Service; @@ -11,12 +11,13 @@ import org.springframework.stereotype.Service;
11 import java.util.ArrayList; 11 import java.util.ArrayList;
12 import java.util.Date; 12 import java.util.Date;
13 import java.util.List; 13 import java.util.List;
  14 +import java.util.Set;
14 15
15 @Service 16 @Service
16 public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ 17 public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{
17 18
18 @Override 19 @Override
19 - List<FishAquaticPublicOpinion> collect(String day) { 20 + List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
20 String domain = "http://www.csfish.org.cn/catalog/197"; 21 String domain = "http://www.csfish.org.cn/catalog/197";
21 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 22 List<FishAquaticPublicOpinion> list = new ArrayList<>();
22 try { 23 try {
@@ -31,21 +32,24 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{ @@ -31,21 +32,24 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{
31 String url = a.attr("abs:href"); 32 String url = a.attr("abs:href");
32 33
33 String title = a.text(); 34 String title = a.text();
34 -  
35 - String time = li.select("span").text();  
36 - if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))  
37 - {  
38 - System.out.println("url:"+url+" title:"+title+" time:"+time);  
39 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
40 - aquaticPublicOpinion.setTitle(title);  
41 - aquaticPublicOpinion.setInfoUrl(url);  
42 - aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));  
43 - aquaticPublicOpinion.setCreateTime(new Date());  
44 - aquaticPublicOpinion.setAquaticType(1);  
45 - list.add(aquaticPublicOpinion);  
46 - }else {  
47 - return list; 35 + if (!dedupMap.contains(title)) {
  36 + dedupMap.add(title);
  37 + String time = li.select("span").text();
  38 + if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
  39 + {
  40 + System.out.println("url:"+url+" title:"+title+" time:"+time);
  41 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  42 + aquaticPublicOpinion.setTitle(title);
  43 + aquaticPublicOpinion.setInfoUrl(url);
  44 + aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
  45 + aquaticPublicOpinion.setCreateTime(new Date());
  46 + aquaticPublicOpinion.setAquaticType(1);
  47 + list.add(aquaticPublicOpinion);
  48 + }else {
  49 + return list;
  50 + }
48 } 51 }
  52 +
49 } 53 }
50 } catch (Exception e) { 54 } catch (Exception e) {
51 logger.error("数据解析错误:"+domain,e); 55 logger.error("数据解析错误:"+domain,e);
@@ -23,11 +23,11 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ @@ -23,11 +23,11 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
23 * @return 23 * @return
24 */ 24 */
25 @Override 25 @Override
26 - public List<FishAquaticPublicOpinion> collect(String day) 26 + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
27 { 27 {
28 //先下载规章 28 //先下载规章
29 nyncbgzk(); 29 nyncbgzk();
30 - List<FishAquaticPublicOpinion> list = govpublic(day); 30 + List<FishAquaticPublicOpinion> list = govpublic(day,dedupMap);
31 return list; 31 return list;
32 } 32 }
33 33
@@ -67,7 +67,7 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ @@ -67,7 +67,7 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
67 wwwMoaGovCn.nyncbgzk(); 67 wwwMoaGovCn.nyncbgzk();
68 } 68 }
69 69
70 - private List<FishAquaticPublicOpinion> govpublic(String day) 70 + private List<FishAquaticPublicOpinion> govpublic(String day, Set<String> dedupMap)
71 { 71 {
72 String domain = "https://www.moa.gov.cn/govpublic/"; 72 String domain = "https://www.moa.gov.cn/govpublic/";
73 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 73 List<FishAquaticPublicOpinion> list = new ArrayList<>();
@@ -83,21 +83,24 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{ @@ -83,21 +83,24 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
83 String url = a.attr("abs:href"); 83 String url = a.attr("abs:href");
84 84
85 String title = a.attr("title"); 85 String title = a.attr("title");
86 -  
87 - String time = li.select("span").text();  
88 - if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))  
89 - {  
90 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
91 - aquaticPublicOpinion.setTitle(title);  
92 - aquaticPublicOpinion.setInfoUrl(url);  
93 - aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));  
94 - aquaticPublicOpinion.setCreateTime(new Date());  
95 - aquaticPublicOpinion.setAquaticType(1);  
96 - list.add(aquaticPublicOpinion);  
97 - }else {  
98 - return list; 86 + if (!dedupMap.contains(title)) {
  87 + dedupMap.add(title);
  88 + String time = li.select("span").text();
  89 + if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
  90 + {
  91 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  92 + aquaticPublicOpinion.setTitle(title);
  93 + aquaticPublicOpinion.setInfoUrl(url);
  94 + aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
  95 + aquaticPublicOpinion.setCreateTime(new Date());
  96 + aquaticPublicOpinion.setAquaticType(1);
  97 + list.add(aquaticPublicOpinion);
  98 + }else {
  99 + return list;
  100 + }
99 } 101 }
100 102
  103 +
101 } 104 }
102 } catch (Exception e) { 105 } catch (Exception e) {
103 logger.error("数据解析错误:"+domain,e); 106 logger.error("数据解析错误:"+domain,e);
@@ -16,6 +16,7 @@ import org.springframework.stereotype.Service; @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service;
16 import java.util.ArrayList; 16 import java.util.ArrayList;
17 import java.util.Date; 17 import java.util.Date;
18 import java.util.List; 18 import java.util.List;
  19 +import java.util.Set;
19 20
20 /** 21 /**
21 * 全国水产技术推广总站、中国水产学会 22 * 全国水产技术推广总站、中国水产学会
@@ -29,7 +30,7 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ @@ -29,7 +30,7 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{
29 * @return 30 * @return
30 */ 31 */
31 @Override 32 @Override
32 - public List<FishAquaticPublicOpinion> collect(String day) 33 + public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
33 { 34 {
34 String[] urls = {"http://www.nftec.agri.cn/tzgg/","http://www.nftec.agri.cn/bwzc/","http://www.nftec.agri.cn/zzxhdt/","http://www.nftec.agri.cn/dfgz/","http://www.nftec.agri.cn/zcfg/"}; 35 String[] urls = {"http://www.nftec.agri.cn/tzgg/","http://www.nftec.agri.cn/bwzc/","http://www.nftec.agri.cn/zzxhdt/","http://www.nftec.agri.cn/dfgz/","http://www.nftec.agri.cn/zcfg/"};
35 List<FishAquaticPublicOpinion> list = new ArrayList<>(); 36 List<FishAquaticPublicOpinion> list = new ArrayList<>();
@@ -47,21 +48,24 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{ @@ -47,21 +48,24 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{
47 String url = a.attr("abs:href"); 48 String url = a.attr("abs:href");
48 49
49 String title = a.attr("title"); 50 String title = a.attr("title");
50 -  
51 - String time = li.select("span").text().replace("(","").replace(")","");  
52 - if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))  
53 - {  
54 - FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();  
55 - aquaticPublicOpinion.setTitle(title);  
56 - aquaticPublicOpinion.setInfoUrl(url);  
57 - aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));  
58 - aquaticPublicOpinion.setCreateTime(new Date());  
59 - aquaticPublicOpinion.setAquaticType(1);  
60 - list.add(aquaticPublicOpinion);  
61 - }else {  
62 - return list; 51 + if (!dedupMap.contains(title)) {
  52 + dedupMap.add(title);
  53 + String time = li.select("span").text().replace("(","").replace(")","");
  54 + if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
  55 + {
  56 + FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
  57 + aquaticPublicOpinion.setTitle(title);
  58 + aquaticPublicOpinion.setInfoUrl(url);
  59 + aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
  60 + aquaticPublicOpinion.setCreateTime(new Date());
  61 + aquaticPublicOpinion.setAquaticType(1);
  62 + list.add(aquaticPublicOpinion);
  63 + }else {
  64 + return list;
  65 + }
63 } 66 }
64 67
  68 +
65 } 69 }
66 } catch (Exception e) { 70 } catch (Exception e) {
67 logger.error("数据解析错误:"+domain,e); 71 logger.error("数据解析错误:"+domain,e);