From d5f5403d0cfba0f9aa166e690f2fce8b4a1f1f5c Mon Sep 17 00:00:00 2001 From: linrf Date: Fri, 18 Aug 2023 17:29:17 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=87=AA=E5=8A=A8=E5=88=86?= =?UTF-8?q?=E6=AE=B5=E5=85=B3=E9=94=AE=E8=AF=8D=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../server/controller/KeyWordStrategy.java | 62 +++++++++++++++++++ .../server/controller/SegmentResult.java | 15 +++++ .../com/docus/server/service/Service.java | 33 ++++++++++ 3 files changed, 110 insertions(+) create mode 100644 docus-segmentation/src/main/java/com/docus/server/controller/KeyWordStrategy.java create mode 100644 docus-segmentation/src/main/java/com/docus/server/controller/SegmentResult.java create mode 100644 docus-segmentation/src/main/java/com/docus/server/service/Service.java diff --git a/docus-segmentation/src/main/java/com/docus/server/controller/KeyWordStrategy.java b/docus-segmentation/src/main/java/com/docus/server/controller/KeyWordStrategy.java new file mode 100644 index 0000000..92356b3 --- /dev/null +++ b/docus-segmentation/src/main/java/com/docus/server/controller/KeyWordStrategy.java @@ -0,0 +1,62 @@ +package com.docus.server.controller; + +import lombok.Data; + +import java.util.List; + + +@Data +public class KeyWordStrategy { + + private String id; + + //用包含的逻辑。 + //包含占比。 按照占比排序 + //从什么开始,然后从什么跳出。 + + + // 分段id,包含占比,关键词列表 + + private List keys; + private Double score; + private String segmentId; + + private String segmentName; + + private List keyWordStrategyHashMap; + + + public KeyWordStrategy(String id, List keys, Double score, String segmentId, List keyWordStrategyHashMap) { + this.id = id; + this.keys = keys; + this.score = score; + this.segmentId = segmentId; + this.keyWordStrategyHashMap = keyWordStrategyHashMap; + } + + public SegmentResult isContain(String keyWord) { + for (String key : keys) { + if (keyWord.contains(key)) { + if (key.length() / keyWord.length() >= this.score) { + + if (keyWordStrategyHashMap != null) { + //没有二级分类,直接返回当前分段 + return new SegmentResult(id, this.segmentId, key, this.score); + } else { + //有二级分类,如果有找到,则返回二级分类。 + for (KeyWordStrategy secSegment : keyWordStrategyHashMap) { + SegmentResult contain = secSegment.isContain(key); + return contain; + } + //没有找到,则返回当前分类。 + return new SegmentResult(id, this.segmentId, key, this.score); + } + } + } + } + //如果都没有找到,返回空。 + return null; + } + + +} diff --git a/docus-segmentation/src/main/java/com/docus/server/controller/SegmentResult.java b/docus-segmentation/src/main/java/com/docus/server/controller/SegmentResult.java new file mode 100644 index 0000000..9889461 --- /dev/null +++ b/docus-segmentation/src/main/java/com/docus/server/controller/SegmentResult.java @@ -0,0 +1,15 @@ +package com.docus.server.controller; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@AllArgsConstructor +@Getter +public class SegmentResult { + + private String id; + private String segmentId; + private String keyWord; + private Double score; + +} diff --git a/docus-segmentation/src/main/java/com/docus/server/service/Service.java b/docus-segmentation/src/main/java/com/docus/server/service/Service.java new file mode 100644 index 0000000..50669e1 --- /dev/null +++ b/docus-segmentation/src/main/java/com/docus/server/service/Service.java @@ -0,0 +1,33 @@ +package com.docus.server.service; + +import com.docus.server.controller.KeyWordStrategy; +import com.docus.server.controller.SegmentResult; + +import java.util.ArrayList; +import java.util.List; + +public class Service { + + private List keyWordStrategyHashMap; + + public void init() { + //读取配置。并将初始化整个分段信息。 + keyWordStrategyHashMap = new ArrayList<>(); + + } + + + /** + * 有找到则有,没有则为空 + * + * @param keyword + * @return + */ + public SegmentResult find(String keyword) { + for (KeyWordStrategy keyWordStrategy : keyWordStrategyHashMap) { + return keyWordStrategy.isContain(keyword); + } + return null; + } + +}