增加将数据写入CSV文件的逻辑

2025-07-31 17:37:21 +08:00 · 2025-07-31 17:37:21 +08:00 · 6fe643638e
commit 6fe643638e
parent 7fcc377a27
1 changed files with 41 additions and 18 deletions
--- a/src/pipelines/dbf_to_postgres_ctllog.py
+++ b/src/pipelines/dbf_to_postgres_ctllog.py
@ -1,4 +1,5 @@
 import os
 import csv
 import pandas as pd
 from dbfread import DBF
 from itertools import islice
@ -13,11 +14,11 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
    def __init__(self, config):
        super().__init__(config)
        # todo：本地调试打开
-        # self.data_root = 'D:\disney_test'
+        self.data_root = 'D:\disney_test'
-        # self.mapping_file = 'D:\disney_test\disney-mapping.xlsx'
+        self.mapping_file = 'D:\disney_test\disney-mapping-v2.xlsx'
        # todo：本地调试打开
-        self.data_root = os.getenv('DATA_PVC_MOUNT_PATH', '/data')
+        # self.data_root = os.getenv('DATA_PVC_MOUNT_PATH', '/data')
-        self.mapping_file = os.getenv('MAPPING_FILE')
+        # self.mapping_file = os.getenv('MAPPING_FILE')
        self.dbf_dir = os.getenv('DBF_INPUT_DIR', os.path.join(self.data_root, 'dbf-input'))
@ -46,7 +47,7 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
            # 清理数据
            mapping_df = mapping_df.dropna(subset=['AS_SERIAL', 'ID', 'data_field_sequence_id'])
            mapping_df = mapping_df[['AS_SERIAL', 'ID', 'data_field_sequence_id',
-                                     'control_group_controller_id', 'controller_point_id']]
+                                     'control_group_controller_id', 'controller_point_id','control_group_name','control_group_id']]
            # 创建映射字典
            mapping_dict = {}
@ -58,7 +59,9 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
                mapping_dict[key].append({
                    'seq_id': int(row['data_field_sequence_id']),
                    'control_group_controller_id': int(row['control_group_controller_id']),
-                    'point_id': int(row['controller_point_id'])
+                    'point_id': int(row['controller_point_id']),
                    'control_group_name': row['control_group_name'],
                    'control_group_id': int(row['control_group_id'])
                })
            self.logger.info(f"Loaded {len(mapping_dict)} mapping entries")
@ -73,10 +76,18 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
        # 连接数据库
        # todo：本地调试时打开
-        db_config = self.config.get_database_config()
+        # db_config = self.config.get_database_config()
-        self.db = Database(**db_config)
+        # self.db = Database(**db_config)
        # todo：本地调试时打开
        self.csv_file_path = 'D:\disney_test\debug_controller_log.csv'
        # 初始化CSV文件
        if not os.path.exists(self.csv_file_path):
            with open(self.csv_file_path, 'w') as f:
                csv.writer(f).writerow(
                    ['created', 'control_group_controller_id', 'point_id', 'real_value', 'control_group_name',
                     'control_group_id'])
        # 处理文件
        total_processed = 0
        for filename in os.listdir(self.dbf_dir):
@ -88,7 +99,7 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
        # 关闭数据库连接
        # todo：本地调试时打开
-        self.db.disconnect()
+        # self.db.disconnect()
        # todo：本地调试时打开
        return total_processed
@ -146,17 +157,24 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
                            formatted_time,
                            mapping['control_group_controller_id'],
                            mapping['point_id'],
-                            float(value)
+                            float(value),
                            mapping['control_group_name'],
                            mapping['control_group_id']
                        ))
                    # 批量插入
                    if len(batch_data) >= batch_size:
                        # todo：本地调试先注释掉
-                        self.db.execute_batch(
+                        # self.db.execute_batch(
-                            "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
+                        #     "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
-                            batch_data
+                        #     batch_data
-                        )
+                        # )
                        # todo：本地调试先注释掉
                        # 以追加模式写入 CSV
                        with open(self.csv_file_path, 'a', newline='') as f:
                            csv.writer(f).writerows(batch_data)
                        processed_records += len(batch_data)
                        self.logger.debug(f"Processed {processed_records} records from {os.path.basename(file_path)}")
                        batch_data = []
@ -168,11 +186,16 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
            # 插入剩余记录
            if batch_data:
                # todo：本地调试先注释掉
-                self.db.execute_batch(
+                # self.db.execute_batch(
-                    "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
+                #     "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
-                    batch_data
+                #     batch_data
-                )
+                # )
                # todo：本地调试先注释掉
                # 以追加模式写入 CSV
                with open(self.csv_file_path, 'a', newline='') as f:
                    csv.writer(f).writerows(batch_data)
                processed_records += len(batch_data)
                self.logger.debug(f"Processed {processed_records} records from {os.path.basename(file_path)}")