什么是交叉特征?
根据对代码的解读,这里的交叉特征是指选择用来进入 _build_extreme_FM 模型中,执行矩阵分解的特征。它首先是人为选择的特征范围,确定进入矩阵分解中的特征选项。分解的过程就看成是“交叉”。
交叉特征的来源?
本赛题里的交叉特征是通过赛题数据中直接提供的特征中选取的。
处理代码
下面这部分代码的执行内容是将选择的交叉特征与多值特征组合
#CIN: 结合多值特征和交叉特征输入CIN中
if hparams.cross_features is not None:
self.cross_features=tf.placeholder(shape=(None,len(hparams.cross_features)), dtype=tf.int32)
self.cross_emb_v2=tf.get_variable(shape=[hparams.cross_hash_num,hparams.k],initializer=self.initializer,name='emb_v2_cross')
emb_inp_v2.append(tf.gather(self.cross_emb_v2, self.cross_features))
接下来,将组合的特征使用 _build_extreme_FM 模型进行分解,分解的结果输出为 result
if len(emb_inp_v2)!=0:
emb_inp_v2=tf.concat(emb_inp_v2,1)
result=self._build_extreme_FM(hparams, emb_inp_v2, res=False, direct=False, bias=False, reduce_D=False, f_dim=2)
dnn_input.append(tf.reshape(emb_inp_v2,[-1,hparams.feature_nums*hparams.k]))
dnn_input.append(result)
result 与其他特征组合成 dnn_input。dnn_input 经过下面的代码,执行真正的训练。这里的 result 就是这里所说的交叉特征。
input_size=int(dnn_input.shape[-1])
for idx in range(len(hparams.hidden_size)):
dnn_input=tf.cond(self.use_norm, lambda: tf.nn.dropout(dnn_input,1-hparams.dropout), lambda: dnn_input)
glorot = np.sqrt(2.0 / (input_size + hparams.hidden_size[idx]))
W = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(input_size, hparams.hidden_size[idx])), dtype=np.float32)
dnn_input=tf.tensordot(dnn_input,W,[[-1],[0]])
dnn_input=tf.nn.relu(dnn_input)
input_size=hparams.hidden_size[idx]
综合最后的训练结果,执行输出。
glorot = np.sqrt(2.0 / (hparams.hidden_size[-1] + 1))
W = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(hparams.hidden_size[-1], 1)), dtype=np.float32)
logit=tf.tensordot(dnn_input,W,[[-1],[0]])
self.val=logit[:,0]
本赛题涉及到的交叉特征?
交叉特征包括如下:
- aid
- gender
- crowd_direction
- delivery_periods
- advertiser
- good_id
- good_type
- ad_type_id
- consuptionAbility
- os
- work
- connectionType
- ad_size
result 是什么?result 的大小是多少?
研究 result 需要进入到下面的 _build_extreme_FM 代码中去。在下面的代码中,可以看到核心是使用了 hparams.cross_layer_sizes 层一维的 curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID') 卷积。因此 result 就是经过多次一维卷积的结果,它的大小依赖于卷积核的经多多次变换的后的结果。
def _build_extreme_FM(self, hparams, nn_input, res=False, direct=False, bias=False, reduce_D=False, f_dim=2):
hidden_nn_layers = []
field_nums = []
final_len = 0
field_num = hparams.feature_nums
nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.k])
field_nums.append(int(field_num))
hidden_nn_layers.append(nn_input)
final_result = []
split_tensor0 = tf.split(hidden_nn_layers[0], hparams.k * [1], 2)
with tf.variable_scope("exfm_part", initializer=self.initializer) as scope:
for idx, layer_size in enumerate(hparams.cross_layer_sizes):
split_tensor = tf.split(hidden_nn_layers[-1], hparams.k * [1], 2)
dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)
dot_result_o = tf.reshape(dot_result_m, shape=[hparams.k, -1, field_nums[0]*field_nums[-1]])
dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
if reduce_D:
filters0 = tf.get_variable("f0_" + str(idx),
shape=[1, layer_size, field_nums[0], f_dim],
dtype=tf.float32)
filters_ = tf.get_variable("f__" + str(idx),
shape=[1, layer_size, f_dim, field_nums[-1]],
dtype=tf.float32)
filters_m = tf.matmul(filters0, filters_)
filters_o = tf.reshape(filters_m, shape=[1, layer_size, field_nums[0] * field_nums[-1]])
filters = tf.transpose(filters_o, perm=[0, 2, 1])
else:
filters = tf.get_variable(name="f_"+str(idx),
shape=[1, field_nums[-1]*field_nums[0], layer_size],
dtype=tf.float32)
# dot_result = tf.transpose(dot_result, perm=[0, 2, 1])
curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID')
# BIAS ADD
if bias:
b = tf.get_variable(name="f_b" + str(idx),
shape=[layer_size],
dtype=tf.float32,
initializer=tf.zeros_initializer())
curr_out = tf.nn.bias_add(curr_out, b)
curr_out = self._activate(curr_out, hparams.cross_activation)
curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
if direct:
direct_connect = curr_out
next_hidden = curr_out
final_len += layer_size
field_nums.append(int(layer_size))
else:
if idx != len(hparams.cross_layer_sizes) - 1:
next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1)
final_len += int(layer_size / 2)
else:
direct_connect = curr_out
next_hidden = 0
final_len += layer_size
field_nums.append(int(layer_size / 2))
final_result.append(direct_connect)
hidden_nn_layers.append(next_hidden)
result = tf.concat(final_result, axis=1)
result = tf.reduce_sum(result, -1)
return result