import numpy as np
import pandas as pd
import csv
data_path = r'..\电费敏感预测\rawdata'
# 工单信息
file_jobinfo_train = '01_arc_s_95598_wkst_train.tsv'
file_jobinfo_test = '01_arc_s_95598_wkst_test.tsv'
# 通话信息记录
file_comm = '02_s_comm_rec.tsv'
# 应收电费信息
file_flow_train = '09_arc_a_rcvbl_flow.tsv'
file_flow_test = '09_arc_a_rcvbl_flow_test.tsv'
# 训练集
file_label = 'train_label.csv'
# 测试集
file_test = 'test_to_predict.csv'
train_info = pd.read_csv(data_path + '\processed_' + file_jobinfo_train,
sep = '\t', quoting = csv.QUOTE_NONE)
# quoting 防止文本里包含英文双引号导致报错
# 过滤CUST_NO为空的用户
train_info = train_info.loc[~train_info.CUST_NO.isnull()]
train_info['CUST_NO'] = train_info.CUST_NO.astype(np.int64)
train_info.head(2)
原创文章 76获赞 14访问量 1万+
关注
私信
展开阅读全文
作者:弎见