PHP读取word文档

发布时间 2023-12-21 21:10:09作者: 79524795

1.安装

1.1composer安装phpoffice/phpword

composer require phpoffice/phpword

1.2 后台代码

  	 $file = $this->request->request('file');
            if (!$file) {
                $this->error(__('Parameter %s can not be empty', 'file'));
            }

            $filePath = ROOT_PATH . DS . 'public' . DS . $file;
            if (!is_file($filePath)) {
                $this->error(__('No results were found'));
            }

            //实例化reader
            $ext = pathinfo($filePath, PATHINFO_EXTENSION);
            if (!in_array($ext, ['csv', 'xls', 'xlsx','docx'])) {
                $this->error(__('Unknown data format'));
            }
            //下发为docx文档的处理
            // 替换双斜杠为单斜杠
            $filePath = str_replace('//', '/', $filePath);
			
			
			  $list = $this->tianhao($filePath);
			  
			  
			  

1.3把文档做成数组

         
        public function tianhao($cleanedFilePath)
        { 
      
   try {
    // 尝试加载 DOCX 文件
    $phpWord = IOFactory::load($cleanedFilePath);

    // 初始化汇总数组
    $documentContent = [];

    // 使用 for 循环遍历文档的段落和表格
    $sectionCount = count($phpWord->getSections());
    for ($i = 0; $i < $sectionCount; $i++) {
        $section = $phpWord->getSections()[$i];

        $elementCount = count($section->getElements());
        for ($j = 0; $j < $elementCount; $j++) {
            $element = $section->getElements()[$j];

            // 处理文本元素
            if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
                $text = '';
                $textElementCount = count($element->getElements());
                for ($k = 0; $k < $textElementCount; $k++) {
                    // 检查元素类型,只有是 Text 类型的时候才获取文本
                    if ($element->getElements()[$k] instanceof \PhpOffice\PhpWord\Element\Text) {
                        $text .= $element->getElements()[$k]->getText();
                    }
                    // 处理 Image 类型的图片
                    elseif ($element->getElements()[$k] instanceof \PhpOffice\PhpWord\Element\Image) {
                        $binaryImageData = $element->getElements()[$k]->getImageString();
                        $base64ImageData = base64_encode($binaryImageData);
                        //---
                  
                        // // 生成唯一的文件名
                        $filename = uniqid() . '.png';
                    
                        // // 指定保存路径
                        $uploadPath = '/www/wwwroot/tk.79524795.vip/public/wordimg/' . $filename;

                        // // 将 base64 数据解码并保存为文件
                        file_put_contents($uploadPath, base64_decode($base64ImageData));
                        // // 返回图片的地址
                        $imageUrl = 'http://tk.79524795.vip/wordimg/' . $filename;
                        // echo $imageUrl;
                        //---
                        $text .= $imageUrl;
                        // $text .= $base64ImageData;
                    }
                }
                $documentContent[] = $text;
            }
            // 处理表格元素
            elseif ($element instanceof \PhpOffice\PhpWord\Element\Table) {
                
                foreach ($element->getRows() as $ele)
                {
                    $return[] = $this->getTableNode($ele);
                }
                 $documentContent[] = $return;
            }
        }
    }

return $documentContent;
    // 输出汇总数组
    // var_dump($documentContent);
    // die;
} catch (\PhpOffice\PhpWord\Exception\Exception $e) {
    // 捕获异常并输出错误信息
    die('Error loading DOCX file: ' . $e->getMessage());
}

        }