# 百分比加权选择

本文:https://www.yuque.com/mrcode.cn/note-combat/mgxkqm1cvcfhzpgz


import java.util.ArrayList;
import java.util.List;
import java.util.Random;

public class WeightedRandomSelection {
    public static void main(String[] args) {
        List<String> data = new ArrayList<>();
        data.add("数据1");
        data.add("数据2");
        data.add("数据3");
        data.add("数据4");
        data.add("数据5");

        List<Double> weights = new ArrayList<>();
        weights.add(0.1);
        weights.add(0.5);
        weights.add(0.1);
        weights.add(0.1);
        weights.add(0.2);

        for (int i = 0; i < 10; i++) {
            String selectedData = weightedRandomChoice(data, weights);
            System.out.println("Selected data: " + selectedData);
        }

    }

    public static String weightedRandomChoice(List<String> data, List<Double> weights) {
        double totalWeight = 0.0;
        for (double weight : weights) {
            totalWeight += weight;
        }

        double threshold = new Random().nextDouble() * totalWeight;
        double cumulativeWeight = 0.0;
        for (int i = 0; i < data.size(); i++) {
            cumulativeWeight += weights.get(i);
            if (cumulativeWeight >= threshold) {
                return data.get(i);
            }
        }

        // 如果没有选择到数据,返回null或者抛出异常,根据具体需求决定
        return null;
    }
}


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

需要注意的点:

  1. 所有数据的权重占比总和需要是 100%(这里总和是 1),否则看到的加权效果不明显
  2. 数据量太少的话,也有可能加权效果不明显,这里测试的 5 条数据,循环 10 次,加权效果还可以

# 提取成工具

package cn.mrcode.weighted;

import cn.hutool.core.util.RandomUtil;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.function.Function;

/**
 * @author mrcode
 * @date 2023/11/7 15:38
 */
public class WeightedRandomSelectionUtil {
    /**
     * 一次性获取
     * @param datas
     * @param weightedValueExtract
     * @return
     * @param <T>
     */
    public static <T> T select(List<T> datas, Function<T, Double> weightedValueExtract) {
        ArrayList<Double> weightedValues = new ArrayList<>();
        for (int i = 0; i < datas.size(); i++) {
            T t = datas.get(i);
            Double weightedValue = weightedValueExtract.apply(t);
            weightedValues.set(i, weightedValue);
        }
        return weightedRandomChoice(datas, weightedValues);
    }

    /**
     * 重复选取
     * @param data
     * @param weights
     * @return
     * @param <T>
     */
    public static <T> T weightedRandomChoice(List<T> data, List<Double> weights) {
        double totalWeight = 0.0;
        for (double weight : weights) {
            totalWeight += weight;
        }

//        double threshold = new Random().nextDouble() * totalWeight;
        double threshold = RandomUtil.getRandom().nextDouble() * totalWeight;
        double cumulativeWeight = 0.0;
        for (int i = 0; i < data.size(); i++) {
            cumulativeWeight += weights.get(i);
            if (cumulativeWeight >= threshold) {
                return data.get(i);
            }
        }

        // 如果没有选择到数据,返回null或者抛出异常,根据具体需求决定
        return null;
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

测试

    public static void main(String[] args) {
        List<String> data = new ArrayList<>();
        data.add("数据1");
        data.add("数据2");
        data.add("数据3");
        data.add("数据4");
        data.add("数据5");

        List<Double> weights = new ArrayList<>();
        weights.add(0.1);
        weights.add(0.5);
        weights.add(0.1);
        weights.add(0.1);
        weights.add(0.2);

        for (int i = 0; i < 10; i++) {
            String selectedData = WeightedRandomSelectionUtil.weightedRandomChoice(data, weights);
            System.out.println("Selected data: " + selectedData);
        }

    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

测试输出

Selected data: 数据3
Selected data: 数据2
Selected data: 数据5
Selected data: 数据3
Selected data: 数据3
Selected data: 数据2
Selected data: 数据2
Selected data: 数据5
Selected data: 数据1
Selected data: 数据5

# 再运行一次
Selected data: 数据4
Selected data: 数据2
Selected data: 数据5
Selected data: 数据5
Selected data: 数据4
Selected data: 数据2
Selected data: 数据5
Selected data: 数据2
Selected data: 数据2
Selected data: 数据3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

从结果可以看到,大部分情况下,这个加权是有效果的