# 百分比加权选择
本文:https://www.yuque.com/mrcode.cn/note-combat/mgxkqm1cvcfhzpgz
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class WeightedRandomSelection {
public static void main(String[] args) {
List<String> data = new ArrayList<>();
data.add("数据1");
data.add("数据2");
data.add("数据3");
data.add("数据4");
data.add("数据5");
List<Double> weights = new ArrayList<>();
weights.add(0.1);
weights.add(0.5);
weights.add(0.1);
weights.add(0.1);
weights.add(0.2);
for (int i = 0; i < 10; i++) {
String selectedData = weightedRandomChoice(data, weights);
System.out.println("Selected data: " + selectedData);
}
}
public static String weightedRandomChoice(List<String> data, List<Double> weights) {
double totalWeight = 0.0;
for (double weight : weights) {
totalWeight += weight;
}
double threshold = new Random().nextDouble() * totalWeight;
double cumulativeWeight = 0.0;
for (int i = 0; i < data.size(); i++) {
cumulativeWeight += weights.get(i);
if (cumulativeWeight >= threshold) {
return data.get(i);
}
}
// 如果没有选择到数据,返回null或者抛出异常,根据具体需求决定
return null;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
需要注意的点:
- 所有数据的权重占比总和需要是 100%(这里总和是 1),否则看到的加权效果不明显
- 数据量太少的话,也有可能加权效果不明显,这里测试的 5 条数据,循环 10 次,加权效果还可以
# 提取成工具
package cn.mrcode.weighted;
import cn.hutool.core.util.RandomUtil;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.function.Function;
/**
* @author mrcode
* @date 2023/11/7 15:38
*/
public class WeightedRandomSelectionUtil {
/**
* 一次性获取
* @param datas
* @param weightedValueExtract
* @return
* @param <T>
*/
public static <T> T select(List<T> datas, Function<T, Double> weightedValueExtract) {
ArrayList<Double> weightedValues = new ArrayList<>();
for (int i = 0; i < datas.size(); i++) {
T t = datas.get(i);
Double weightedValue = weightedValueExtract.apply(t);
weightedValues.set(i, weightedValue);
}
return weightedRandomChoice(datas, weightedValues);
}
/**
* 重复选取
* @param data
* @param weights
* @return
* @param <T>
*/
public static <T> T weightedRandomChoice(List<T> data, List<Double> weights) {
double totalWeight = 0.0;
for (double weight : weights) {
totalWeight += weight;
}
// double threshold = new Random().nextDouble() * totalWeight;
double threshold = RandomUtil.getRandom().nextDouble() * totalWeight;
double cumulativeWeight = 0.0;
for (int i = 0; i < data.size(); i++) {
cumulativeWeight += weights.get(i);
if (cumulativeWeight >= threshold) {
return data.get(i);
}
}
// 如果没有选择到数据,返回null或者抛出异常,根据具体需求决定
return null;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
测试
public static void main(String[] args) {
List<String> data = new ArrayList<>();
data.add("数据1");
data.add("数据2");
data.add("数据3");
data.add("数据4");
data.add("数据5");
List<Double> weights = new ArrayList<>();
weights.add(0.1);
weights.add(0.5);
weights.add(0.1);
weights.add(0.1);
weights.add(0.2);
for (int i = 0; i < 10; i++) {
String selectedData = WeightedRandomSelectionUtil.weightedRandomChoice(data, weights);
System.out.println("Selected data: " + selectedData);
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
测试输出
Selected data: 数据3
Selected data: 数据2
Selected data: 数据5
Selected data: 数据3
Selected data: 数据3
Selected data: 数据2
Selected data: 数据2
Selected data: 数据5
Selected data: 数据1
Selected data: 数据5
# 再运行一次
Selected data: 数据4
Selected data: 数据2
Selected data: 数据5
Selected data: 数据5
Selected data: 数据4
Selected data: 数据2
Selected data: 数据5
Selected data: 数据2
Selected data: 数据2
Selected data: 数据3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
从结果可以看到,大部分情况下,这个加权是有效果的