作者 钟来

水产舆情采集海报生成过滤广告时间参数bug

... ... @@ -16,8 +16,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.*;
public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionService{
protected final Logger logger = LoggerFactory.getLogger(this.getClass());
... ... @@ -64,7 +63,7 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS
}
}
abstract List<FishAquaticPublicOpinion> collect(String day);
abstract List<FishAquaticPublicOpinion> collect(String day,Set<String> dedupMap);
abstract String getInfo(String info_url);
@Override
public void run(String day) {
... ... @@ -72,7 +71,9 @@ public abstract class AquaticPublicOpinionBase implements AquaticPublicOpinionS
{
day = DateUtils.getDate();
}
List<FishAquaticPublicOpinion> list = collect(day);
// 每个地区维护一个 Set 用于去重
Set<String> dedupMap = new HashSet<>();
List<FishAquaticPublicOpinion> list = collect(day,dedupMap);
if (null != list && list.size() != 0)
{
//持久
... ...
... ... @@ -21,10 +21,7 @@ import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
import java.util.*;
/**
* 微信公众号
... ... @@ -88,14 +85,14 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
}
@Override
List<FishAquaticPublicOpinion> collect(String day) {
List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
Random rand = new Random();
List<FishAquaticPublicOpinion> list = new ArrayList<>();
String[] gzh = new String[]{"中国水产","水产前沿","淡水渔业","海大集团","海洋与渔业杂志","华中渔业研究社","科学养鱼","南京渔业科技","农民日报","水产养殖编辑部","UCN国际海产资讯","养殖前沿","壹渔业","中国渔业报"};
for (String name: gzh)
{
List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2);
List<FishAquaticPublicOpinion> publist = getPublishList(day,name,2,dedupMap);
if (null != publist && publist.size()!=0)
{
list.addAll(publist);
... ... @@ -111,7 +108,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
String[] gzh_yj = new String[]{"海大农牧 水产一线","水产养殖网","农财宝典-大国渔业","大唐洋帆","禾晨科技","鱼市场","喜农和集团"};
for (String name: gzh_yj)
{
List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3);
List<FishAquaticPublicOpinion> publist = getPublishList(day,name,3,dedupMap);
if (null != publist && publist.size()!=0)
{
list.addAll(publist);
... ... @@ -211,7 +208,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
return null;
}
private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType)
private List<FishAquaticPublicOpinion> getPublishList(String day,String name,Integer aquaticType,Set<String> dedupMap)
{
String fakeid = getFakeid(name);
if (StringUtils.isNotEmpty(fakeid))
... ... @@ -221,7 +218,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
.header("cookie",cookie)
.execute().body();
System.out.println(str);
return parsePublish(day,str,aquaticType);
return parsePublish(day,str,aquaticType,dedupMap);
}
return null;
}
... ... @@ -230,7 +227,7 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
* 解析publish返回的消息
* @param str
*/
public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType)
public List<FishAquaticPublicOpinion> parsePublish(String day,String str,Integer aquaticType,Set<String> dedupMap)
{
JSONObject jsonObject = JSONObject.parseObject(str);
System.out.println(str);
... ... @@ -250,20 +247,25 @@ public class MpWeixinQqCom extends AquaticPublicOpinionBase{
if (null != appmsgex && appmsgex.size() != 0) {
JSONObject appmsg = appmsgex.getJSONObject(0);
String title = appmsg.getString("title");
Integer update_time = appmsg.getInteger("update_time");
Date timeday = new Date(update_time * 1000l);
String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday);
if (StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day, DateUtils.YYYY_MM_DD))) {
String info_url = appmsg.getString("link");
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(info_url);
aquaticPublicOpinion.setReleaseTime(timeday);
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(aquaticType);
list.add(aquaticPublicOpinion);
} else {
return list;
if(!dedupMap.contains(dedupMap))
{
dedupMap.add(title);
Integer update_time = appmsg.getInteger("update_time");
Date timeday = new Date(update_time * 1000l);
String time = DateUtils.parseDateToStr(DateUtils.YYYY_MM_DD, timeday);
if (StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day, DateUtils.YYYY_MM_DD))) {
String info_url = appmsg.getString("link");
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(info_url);
aquaticPublicOpinion.setReleaseTime(timeday);
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(aquaticType);
list.add(aquaticPublicOpinion);
} else {
return list;
}
}
}
... ...
... ... @@ -12,11 +12,12 @@ import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
@Service
public class WwwCafsAcCn extends AquaticPublicOpinionBase{
@Override
List<FishAquaticPublicOpinion> collect(String day) {
List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
String[] urls = {"https://www.cafs.ac.cn/kxyj/kyjz.htm","https://www.cafs.ac.cn/gjhz/gjhz.htm","https://www.cafs.ac.cn/djwh/djdt.htm","https://www.cafs.ac.cn/xwxx/tpxw.htm"};
List<FishAquaticPublicOpinion> list = new ArrayList<>();
... ... @@ -34,21 +35,25 @@ public class WwwCafsAcCn extends AquaticPublicOpinionBase{
String url = a.attr("abs:href");
String title = a.attr("title");
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
if (!dedupMap.contains(title))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,"yyyy年MM月dd日"));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else{
return list;
dedupMap.add(title);
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time, "yyyy年MM月dd日").equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,"yyyy年MM月dd日"));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else{
return list;
}
}
}
} catch (Exception e) {
logger.error("数据解析错误:"+domain,e);
... ...
... ... @@ -11,20 +11,17 @@ import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
@Service
public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{
public static void main(String[] args) {
WwwChinaCfaOrg wwwCsfishOrgCn = new WwwChinaCfaOrg();
wwwCsfishOrgCn.collect("");
}
/**
* 中国渔业协会
* @param day
* @return
*/
@Override
public List<FishAquaticPublicOpinion> collect(String day)
public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
{
String[] urls = {"http://www.china-cfa.org/xwzx/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/tzgg/","http://www.china-cfa.org/hzjl/","http://www.china-cfa.org/hzzx/","http://www.china-cfa.org/jypx/","http://www.china-cfa.org/bzgz/"};
List<FishAquaticPublicOpinion> list = new ArrayList<>();
... ... @@ -42,20 +39,24 @@ public class WwwChinaCfaOrg extends AquaticPublicOpinionBase{
String url = a.attr("abs:href");
String title = a.attr("title");
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
if (!dedupMap.contains(title))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else{
return list;
dedupMap.add(title);
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else{
return list;
}
}
}
} catch (Exception e) {
logger.error("数据解析错误:"+domain,e);
... ...
... ... @@ -11,12 +11,13 @@ import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
@Service
public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{
@Override
List<FishAquaticPublicOpinion> collect(String day) {
List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap) {
String domain = "http://www.csfish.org.cn/catalog/197";
List<FishAquaticPublicOpinion> list = new ArrayList<>();
try {
... ... @@ -31,21 +32,24 @@ public class WwwCsfishOrgCn extends AquaticPublicOpinionBase{
String url = a.attr("abs:href");
String title = a.text();
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
System.out.println("url:"+url+" title:"+title+" time:"+time);
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
if (!dedupMap.contains(title)) {
dedupMap.add(title);
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
System.out.println("url:"+url+" title:"+title+" time:"+time);
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
}
}
}
} catch (Exception e) {
logger.error("数据解析错误:"+domain,e);
... ...
... ... @@ -23,11 +23,11 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
* @return
*/
@Override
public List<FishAquaticPublicOpinion> collect(String day)
public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
{
//先下载规章
nyncbgzk();
List<FishAquaticPublicOpinion> list = govpublic(day);
List<FishAquaticPublicOpinion> list = govpublic(day,dedupMap);
return list;
}
... ... @@ -67,7 +67,7 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
wwwMoaGovCn.nyncbgzk();
}
private List<FishAquaticPublicOpinion> govpublic(String day)
private List<FishAquaticPublicOpinion> govpublic(String day, Set<String> dedupMap)
{
String domain = "https://www.moa.gov.cn/govpublic/";
List<FishAquaticPublicOpinion> list = new ArrayList<>();
... ... @@ -83,21 +83,24 @@ public class WwwMoaGovCn extends AquaticPublicOpinionBase{
String url = a.attr("abs:href");
String title = a.attr("title");
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
if (!dedupMap.contains(title)) {
dedupMap.add(title);
String time = li.select("span").text();
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
}
}
}
} catch (Exception e) {
logger.error("数据解析错误:"+domain,e);
... ...
... ... @@ -16,6 +16,7 @@ import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;
/**
* 全国水产技术推广总站、中国水产学会
... ... @@ -29,7 +30,7 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{
* @return
*/
@Override
public List<FishAquaticPublicOpinion> collect(String day)
public List<FishAquaticPublicOpinion> collect(String day, Set<String> dedupMap)
{
String[] urls = {"http://www.nftec.agri.cn/tzgg/","http://www.nftec.agri.cn/bwzc/","http://www.nftec.agri.cn/zzxhdt/","http://www.nftec.agri.cn/dfgz/","http://www.nftec.agri.cn/zcfg/"};
List<FishAquaticPublicOpinion> list = new ArrayList<>();
... ... @@ -47,21 +48,24 @@ public class WwwNftecAgriCn extends AquaticPublicOpinionBase{
String url = a.attr("abs:href");
String title = a.attr("title");
String time = li.select("span").text().replace("(","").replace(")","");
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
if (!dedupMap.contains(title)) {
dedupMap.add(title);
String time = li.select("span").text().replace("(","").replace(")","");
if(StringUtils.isNotEmpty(day) && DateUtils.parseDate(time,DateUtils.YYYY_MM_DD).equals(DateUtils.parseDate(day,DateUtils.YYYY_MM_DD)))
{
FishAquaticPublicOpinion aquaticPublicOpinion = new FishAquaticPublicOpinion();
aquaticPublicOpinion.setTitle(title);
aquaticPublicOpinion.setInfoUrl(url);
aquaticPublicOpinion.setReleaseTime(DateUtils.parseDate(time,DateUtils.YYYY_MM_DD));
aquaticPublicOpinion.setCreateTime(new Date());
aquaticPublicOpinion.setAquaticType(1);
list.add(aquaticPublicOpinion);
}else {
return list;
}
}
}
} catch (Exception e) {
logger.error("数据解析错误:"+domain,e);
... ...