add ctt and fix fa

Guozhiming97 · Jan 26, 2018 · 55bef09 · 55bef09
1 parent 782b8d3
commit 55bef09
Show file tree

Hide file tree

Showing 6 changed files with 115 additions and 6 deletions.
diff --git a/demo/demo_ctt.py b/demo/demo_ctt.py
@@ -0,0 +1,11 @@
+from __future__ import print_function
+from psy.ctt import Ctt
+import numpy as np
+
+f = file('lsat.csv')
+score = np.loadtxt(f, delimiter=",")
+ctt = Ctt(score)
+print(ctt.get_reliability())
+print(ctt.get_cr())
+print(ctt.get_discrimination())
+print(ctt.get_difficulty())
diff --git a/demo/demo_sim_ctt.py b/demo/demo_sim_ctt.py
@@ -0,0 +1,32 @@
+from __future__ import division
+import numpy as np
+
+from psy.ctt import Ctt
+
+r_list = [0.5, 0.5, 0.5, 0.5, 0.5]
+t_list = np.zeros((500, 5))
+x_list = np.zeros((500, 5))
+for i, r in enumerate(r_list):
+    t = np.random.randint(0, 2, 500)
+    # t.sort()
+    # t = np.round(t, 0)
+    # t[t > 1] = 1
+    # t[t < 0] = 0
+    var_t = np.var(t)
+    var_x = var_t / r
+    var_e = var_x - var_t
+    std_e = var_e ** 0.5
+    e = np.random.normal(0, std_e, 500)
+    x = np.round(t + e, 0)
+    x[x < 0] = 0
+    x[x > 1] = 1
+    t_list[:, i] = t
+    x_list[:, i] = x
+
+var_tt = np.var(np.sum(t_list, axis=1))
+var_tx = np.var(np.sum(x_list, axis=1))
+np.savetxt('ctt.csv', x_list, delimiter=',')
+print var_tt / var_tx
+ctt = Ctt(scores=x_list)
+print ctt.get_reliability()
+print ctt.get_cr()
diff --git a/psy/ctt/__init__.py b/psy/ctt/__init__.py
@@ -0,0 +1 @@
+from ctt import Ctt
diff --git a/psy/ctt/ctt.py b/psy/ctt/ctt.py
@@ -0,0 +1,49 @@
+# coding=utf-8
+from __future__ import division, print_function
+import numpy as np
+
+from psy import Factor
+
+
+class BaseCtt(object):
+
+    def __init__(self, scores):
+        self._scores = scores
+        self.sum_scores = np.sum(scores, axis=1)
+        self.sum_scores.shape = self.sum_scores.shape[0], 1
+        self.item_size = scores.shape[1]
+
+    def get_composite_reliability(self):
+        # 组合信度
+        f = Factor(self._scores.transpose(), 1)
+        loadings = f.loadings
+        lambda_sum_square = np.sum(loadings) ** 2
+        lambda_square_sum = np.sum(loadings ** 2)
+        return lambda_sum_square / (lambda_sum_square - lambda_square_sum + self.item_size)
+
+    def get_alpha_reliability(self):
+        scores = self._scores
+        item_size = self.item_size
+        # 每道试题的方差
+        items_var = np.var(scores, axis=0)
+        # 所有试题方差的和
+        sum_items_var = np.sum(items_var)
+        # 计算总分方差
+        sum_scores_var = np.var(self.sum_scores)
+        return item_size / (item_size - 1) * (1 - sum_items_var / sum_scores_var)
+
+
+class Ctt(BaseCtt):
+
+    def get_discrimination(self):
+        scores = self._scores
+        scores_mean = np.mean(scores, axis=0)
+        sum_scores_mean = np.mean(self.sum_scores)
+        center = (scores - scores_mean) * (self.sum_scores - sum_scores_mean)
+        cov = np.mean(center, axis=0)
+        std = np.std(scores, axis=0) * np.std(self.sum_scores)
+        return cov / std
+
+    def get_difficulty(self):
+        return np.mean(self._scores, axis=0)
+
diff --git a/psy/fa/factors.py b/psy/fa/factors.py
@@ -7,10 +7,9 @@ class Factor(object):
 
     # 简单的因子分析，服务于mirt的初值估计
 
-    def __init__(self, scores, factors_num, cov_mat_type='cor'):
+    def __init__(self, scores, factors_num):
         self._scores = scores
         self._factors_num = factors_num
-        self._cov_mat_type = cov_mat_type
 
     @cached_property
     def cor(self):
@@ -23,10 +22,27 @@ def polycor(self):
         # 伪polycor
         return np.abs(self.cor) ** (1 / 1.15) * np.sign(self.cor)
 
+    @property
+    def mirt_loading(self):
+        cov = self.polycor
+        score_eig = self._get_eigen(cov)
+        loadings = score_eig[1][:, :self._factors_num]
+        return loadings
+
+    @staticmethod
+    def _get_eigen(cov):
+        score_eig = np.linalg.eig(cov)
+        idx = score_eig[0].argsort()
+        eigenvalues = score_eig[0][idx][::-1]
+        _eigenvectors = score_eig[1][:, idx][:, ::-1]
+        eigenvectors = _eigenvectors * np.sign(np.sum(_eigenvectors, 0))
+        return eigenvalues, eigenvectors
+
     @property
     def loadings(self):
         # 因子载荷
-        cov = getattr(self, self._cov_mat_type)
-        score_eig = np.linalg.eig(cov)
-        loadings = -1 * score_eig[1][:, :self._factors_num]
+        cov = self.cor
+        score_eig = self._get_eigen(cov)
+        _loadings = score_eig[0] ** 0.5 * score_eig[1]
+        loadings = _loadings[:, :self._factors_num]
         return loadings
diff --git a/psy/mirt/irm.py b/psy/mirt/irm.py
@@ -236,7 +236,7 @@ def _get_init_slop_threshold(self, dim_size):
         # 求初始值
         # 斜率是因子分析后的因子载荷转化
         # 阈值是logistic函数的反函数转化
-        loadings = Factor(self.scores.transpose(), dim_size, 'polycor').loadings
+        loadings = Factor(self.scores.transpose(), dim_size).mirt_loading
         loadings_tr = loadings.transpose()
         d = (1 - np.sum(loadings_tr ** 2, axis=0)) ** 0.5
         init_slop = loadings_tr / d * 1.702