n8n 获取新闻联播文字稿工作流

shayne · 2025 年10 月 17 日 07:41

搭建好了n8n不知道干点啥,就在坛子里搜了下,发现了这个帖子每天新闻联播的文字稿自动整理推送的程序应该是有吧？？？？？？

利用这位大佬代码里提供的链接,获取到了新闻联播的数据,在n8n里鼓捣了一下,成功收到了邮件


{
  "name": "新闻联播抓取",
  "nodes": [
    {
      "parameters": {},
      "id": "020a89e2-8a66-4ae7-b7ad-a8d9c1c5b5ea",
      "name": "开始",
      "type": "n8n-nodes-base.start",
      "typeVersion": 1,
      "position": [
        -784,
        224
      ]
    },
    {
      "parameters": {
        "url": "=http://tv.cctv.com/lm/xwlb/day/{{$json.date}}.shtml",
        "options": {}
      },
      "id": "3463b2f6-0255-4a8d-83c7-d5fd8a07a06a",
      "name": "1. 获取新闻列表HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        -336,
        224
      ]
    },
    {
      "parameters": {
        "jsCode": "const allLinks = $json.url;\n// 使用 Set 高效去重\nconst uniqueLinks = [...new Set(allLinks)];\n\n// 第一个链接是摘要链接\nconst abstractLink = uniqueLinks.shift();\n\n// 剩下的是独立新闻的链接\nconst newsLinks = uniqueLinks;\n\n// 返回一个结构清晰的新项目\nreturn { \n  abstractLink: abstractLink, \n  newsLinks: newsLinks \n};"
      },
      "id": "b4cf809e-e97a-4abf-b8a0-fccbc0f7cc6f",
      "name": "分离摘要和新闻链接",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        112,
        224
      ],
      "notes": "接收所有链接的列表，去除重复项，然后将其拆分为一个摘要链接和一组新闻文章链接。"
    },
    {
      "parameters": {
        "url": "={{ $json.abstractLink }}",
        "options": {}
      },
      "id": "06eb3c16-3600-4add-9404-cee913b522f9",
      "name": "2. 获取摘要HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        336,
        32
      ]
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "abstractRaw",
              "cssSelector": "#page_body > div.allcontent > div.video18847 > div.playingCon > div.nrjianjie_shadow > div > ul > li:nth-child(1) > p"
            }
          ]
        },
        "options": {}
      },
      "id": "149c26a6-ad32-492d-a2c9-00df87a12c1f",
      "name": "提取摘要文本",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        560,
        32
      ]
    },
    {
      "parameters": {
        "batchSize": "=1",
        "options": {
          "reset": false
        }
      },
      "id": "2cbd85c9-8ccf-468c-9f3a-691d081a4c35",
      "name": "循环处理新闻链接",
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 2,
      "position": [
        560,
        416
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {}
      },
      "id": "9a8f5f7e-3d89-487c-9b70-398c66c3a41d",
      "name": "3. 获取新闻文章HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        784,
        416
      ],
      "executeOnce": true
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "title",
              "cssSelector": "#page_body > div.allcontent > div.video18847 > div.playingVideo > div.tit"
            },
            {
              "key": "content",
              "cssSelector": "#content_area"
            }
          ]
        },
        "options": {}
      },
      "id": "43dd14e5-d879-47f6-b43d-16258fc26f2e",
      "name": "提取标题和内容",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        1008,
        416
      ]
    },
    {
      "parameters": {
        "jsCode": "const item = $json;\n// 清理标题，移除 '[视频]' 并去除首尾空格\nconst cleanTitle = item.title?.replace('[视频]', '').trim();\nconst link = $('循环处理新闻链接').item.json.url;\n\nitem.title = cleanTitle;\nitem.link = link;\n\nreturn item;"
      },
      "id": "ea2b21e9-6f6d-4061-bf4a-ed6fbceab408",
      "name": "清理抓取的数据",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1232,
        488
      ]
    },
    {
      "parameters": {
        "values": {
          "string": [
            {
              "name": "abstract",
              "value": "={{ $json.abstractRaw.replaceAll('；', \"；\\n\\n\").replaceAll('：', \"：\\n\\n\").trim() }}"
            }
          ]
        },
        "options": {}
      },
      "id": "b8073e67-0771-4f85-9588-e9ff417ed158",
      "name": "格式化摘要",
      "type": "n8n-nodes-base.set",
      "typeVersion": 2,
      "position": [
        784,
        32
      ]
    },
    {
      "parameters": {
        "jsCode": "// --- 核心逻辑开始 ---\n\n// 目标：将分散在多个n8n item中的数据合并成一个完整的对象，\n// 然后生成Markdown，最后将所有数据（原始+新增）作为一个item返回。\n\n// 1. 数据合并与验证\n// 创建一个空对象，用于存放所有输入item的数据。\nconst consolidatedData = {};\n\n// 遍历所有的输入item (items是n8n提供的全局变量)\nfor (const item of items) {\n  // 使用Object.assign将当前item的json数据合并到consolidatedData中\n  // 这能确保无论abstract和news在哪个item里，都会被正确合并。\n  Object.assign(consolidatedData, item.json);\n}\n\n// 验证合并后的数据是否完整\nif (!consolidatedData.abstractRaw || !consolidatedData.news) {\n  // 如果缺少关键字段，抛出明确的错误，方便调试\n  throw new Error(\"输入数据不完整，必须同时包含 'abstractRaw' 和 'news' 字段。请检查上一个节点的输出。\");\n}\n\n// 至此, `consolidatedData` 已经是一个包含所有必需数据的完整对象:\n// { abstractRaw: \"...\", abstract: \"...\", news: [...] }\n\n\n// 2. Markdown内容生成\n// 从 abstractRaw 中提取标题和日期\nconst titleMatch = consolidatedData.abstractRaw.match(/《(.*?)》\\s*(\\d{8})/);\nlet mainTitle = \"新闻简报\";\nlet fileNameBase = \"news_report\";\n\nif (titleMatch && titleMatch.length >= 3) {\n  const programName = titleMatch[1];\n  const dateStr = titleMatch[2];\n  mainTitle = `${programName} ${dateStr}`;\n  fileNameBase = `${programName}_${dateStr}`;\n}\n\n// 初始化Markdown字符串\nlet markdownContent = `# ${mainTitle}\\n\\n`;\n\n// (新代码块)\n// --- 处理“本期内容提要”并转换为无序列表 ---\n// 1. 先提取纯净的摘要文本，移除头部和尾部不需要的内容\nlet abstractListText = consolidatedData.abstractRaw\n  .replace('本期节目主要内容：', '')      // 移除开头的 \"本期节目主要内容：\"\n  .replace(/\\（《.*?》.*?\\）/g, '') // 移除结尾的 \"（《新闻联播》...）\"\n  .trim();                         // 移除首尾多余的空格和换行\n// 2. 将文本块转换为Markdown无序列表\nconst formattedList = abstractListText\n  .split('\\n')                     // 按换行符分割成一个行数组\n  .map(line => line.trim())        // 清理每一行的前后空格\n  .filter(line => line.length > 0) // 过滤掉所有空行\n  .map(line => {\n    // 使用正则表达式替换行首的数字列表标记为Markdown无序列表标记\n    // 匹配 \"1.\" \"10.\" 等\n    let newLine = line.replace(/^\\d+\\.\\s*/, '- ');\n    // 匹配 \"（1）\" \"（10）\" 等，并添加缩进\n    newLine = newLine.replace(/^[（\\(]\\d+[）\\)]\\s*/, '  - ');\n    return newLine;\n  })\n  .join('\\n'); // 将处理后的行数组重新用换行符拼接成一个字符串\n// 3. 将格式化好的列表添加到最终的Markdown内容中\nmarkdownContent += `## 本期内容提要\\n\\n`;\nmarkdownContent += `${formattedList}\\n\\n`;\n\n// 添加分割线\nmarkdownContent += `---\\n\\n`;\n\n// 循环处理每一条新闻详情\n// 根据 pairedItem.item 对 news 数组进行排序，确保新闻顺序正确\nconst sortedNews = consolidatedData.news.sort((a, b) => a.pairedItem.item - b.pairedItem.item);\n\nfor (const newsItem of sortedNews) {\n  const { title, content, link } = newsItem.json;\n  \n  // 添加新闻标题\n  markdownContent += `## ${title}\\n\\n`;\n  \n  // 添加新闻内容\n  markdownContent += `${content}\\n\\n`;\n  \n  // 添加原文链接（如果存在）\n  if (link) {\n    markdownContent += `[原文链接](${link})\\n\\n`;\n  }\n  \n  // 在每条新闻后添加分割线\n  markdownContent += `---\\n\\n`;\n}\n\n// 3. 构造最终输出\n// 创建一个新的结果对象。\nconst result = {\n  json: {\n    // 使用扩展运算符(...)保留所有原始字段\n    ...consolidatedData,\n    \n    // 添加我们新生成的字段\n    fileName: `${fileNameBase}.md`,\n    markdownContent: markdownContent,\n  }\n};\n\n// 4. 返回结果\n// n8n的Code节点要求返回一个数组，这里我们只返回处理好的这一个item。\nreturn result;\n"
      },
      "id": "e4759eba-f833-45ea-91f7-7d40c0ea9fa3",
      "name": "4. 组装Markdown",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1232,
        128
      ]
    },
    {
      "parameters": {},
      "id": "e15c555f-9ed9-4307-bf8e-5ee85c3cffb4",
      "name": "完成",
      "type": "n8n-nodes-base.noOp",
      "typeVersion": 1,
      "position": [
        1680,
        128
      ]
    },
    {
      "parameters": {
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "title",
              "cssSelector": "li a:first-child",
              "returnValue": "attribute",
              "attribute": "title",
              "returnArray": true
            },
            {
              "key": "url",
              "cssSelector": "li a:first-child",
              "returnValue": "attribute",
              "attribute": "href",
              "returnArray": true
            }
          ]
        },
        "options": {
          "trimValues": false,
          "cleanUpText": false
        }
      },
      "type": "n8n-nodes-base.html",
      "typeVersion": 1.2,
      "position": [
        -112,
        224
      ],
      "id": "e88e0875-fe7b-4cd2-b3a2-9c8578e9b1cb",
      "name": "HTML"
    },
    {
      "parameters": {
        "jsCode": "\nconst newsLinks = $json.newsLinks\n\nconst links = newsLinks.map(item =>  {return {url:item}})\n\nreturn links\n\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        336,
        416
      ],
      "id": "6fe5e395-18b4-44d5-9dc9-6ccbc33918f8",
      "name": "Code in JavaScript"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.merge",
      "typeVersion": 3.2,
      "position": [
        1008,
        128
      ],
      "id": "a339def8-f939-4a6e-8a72-e8c4cd0d9e14",
      "name": "Merge"
    },
    {
      "parameters": {
        "jsCode": "// Loop over input items and add a new field called 'myNewField' to the JSON of each one\n// for (const item of $input.all()) {\n//   item.json.myNewField = 1;\n// }\n\nreturn { news:$input.all()};"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        784,
        224
      ],
      "id": "e8286a0d-30f8-4cd1-8777-762847a66eb5",
      "name": "Code in JavaScript1"
    },
    {
      "parameters": {
        "fromEmail": "",
        "toEmail": "",
        "subject": "={{$json.fileName}}",
        "emailFormat": "text",
        "text": "={{ $json.markdownContent }}",
        "options": {}
      },
      "type": "n8n-nodes-base.emailSend",
      "typeVersion": 2.1,
      "position": [
        1456,
        128
      ],
      "id": "0cd2830e-c8cf-4319-9aa0-2ab5fc69ff78",
      "name": "Send email",
      "webhookId": "88866a77-2dd9-43ea-aa2a-6346d6f38ebb",
      "credentials": {
        "smtp": {
          "id": "1",
          "name": "SMTP account"
        }
      }
    },
    {
      "parameters": {
        "jsCode": "// 1. 获取当前时间\nconst now = new Date();\n\n// 2. 获取上海时区的当前小时 (24小时制)\nconst shanghaiHour = parseInt(\n  now.toLocaleTimeString('sv-SE', {\n    hour: '2-digit',\n    hour12: false,\n    timeZone: 'Asia/Shanghai',\n  }),\n  10\n);\n\n// 3. 根据小时数决定使用哪个日期\nlet dateToFormat = now;\nif (shanghaiHour < 20) {\n  // 小于20点，日期减一天\n  // 创建一个新日期对象，避免修改原始的 `now` 对象\n  dateToFormat = new Date(now.getTime() - 24 * 60 * 60 * 1000);\n}\n\n// 4. 格式化最终的日期\nconst date = dateToFormat\n  .toLocaleDateString('zh-CN', {\n    year: 'numeric',\n    month: '2-digit',\n    day: '2-digit',\n    timeZone: 'Asia/Shanghai',\n  })\n  .replace(/\\//g, '');\n\nreturn {date}\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -560,
        224
      ],
      "id": "e0decf06-750d-4ccf-8515-fb68967ec67b",
      "name": "获取日期"
    }
  ],
  "pinData": {},
  "connections": {
    "开始": {
      "main": [
        [
          {
            "node": "获取日期",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1. 获取新闻列表HTML": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "分离摘要和新闻链接": {
      "main": [
        [
          {
            "node": "2. 获取摘要HTML",
            "type": "main",
            "index": 0
          },
          {
            "node": "Code in JavaScript",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2. 获取摘要HTML": {
      "main": [
        [
          {
            "node": "提取摘要文本",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "提取摘要文本": {
      "main": [
        [
          {
            "node": "格式化摘要",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "循环处理新闻链接": {
      "main": [
        [
          {
            "node": "3. 获取新闻文章HTML",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Code in JavaScript1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "3. 获取新闻文章HTML": {
      "main": [
        [
          {
            "node": "提取标题和内容",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "提取标题和内容": {
      "main": [
        [
          {
            "node": "清理抓取的数据",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "清理抓取的数据": {
      "main": [
        [
          {
            "node": "循环处理新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "格式化摘要": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4. 组装Markdown": {
      "main": [
        [
          {
            "node": "Send email",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML": {
      "main": [
        [
          {
            "node": "分离摘要和新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code in JavaScript": {
      "main": [
        [
          {
            "node": "循环处理新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "4. 组装Markdown",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code in JavaScript1": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Send email": {
      "main": [
        [
          {
            "node": "完成",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "获取日期": {
      "main": [
        [
          {
            "node": "1. 获取新闻列表HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "4ac25040-5885-47f0-842b-467d3aa86178",
  "meta": {
    "templateCredsSetupCompleted": true,
    "instanceId": "a92c2d89e657584e4fcb95858841e445654f73c2cecc87656a167b5cf09f5b9e"
  },
  "id": "FlqydI2IFBwHG8XK",
  "tags": []
}

wtffqbpl · 2025 年10 月 17 日 07:41

大佬厉害

yy1111 · 2025 年10 月 17 日 07:47

看着有意思

loyicnaup · 2025 年10 月 17 日 07:48

我意识到我可以直接从上面仓库的存档里获取

shayne · 2025 年10 月 17 日 07:50

你发现了更优化的工作流

antior · 2025 年10 月 21 日 10:57

佬，这个json没办法直接导入呀。

用AI修好了：

antior · 2025 年10 月 21 日 11:04

我也尝试了，结果发现，n8n github访问不能

loyicnaup · 2025 年10 月 21 日 11:39

怎么会不能

HTTP Request随便访问

或者你直接用Github节点但是需要凭证

shayne · 2025 年10 月 21 日 12:21

修好了,json添加了多余的符号,现在应该可以直接导入了

antior · 2025 年10 月 21 日 15:47

用的nas，nas部署docker时候没有指定翻墙。

话说n8n是支持worker对吧？那是否可以指定worker的部分出站走socks代理？

duolabmeng6 · 2025 年11 月 3 日 02:59

不会玩

JqbR001 · 2025 年11 月 3 日 03:09

n8n一直是个迷，得花时间研究下

shayne · 2025 年11 月 3 日 05:40

Send email我没有放key呀, 这个需要你自己的key

c0nch · 2025 年11 月 13 日 18:23

添加了个markdown转html的节点

miujy · 2025 年11 月 13 日 18:25

明天试一下

话题		回复	浏览量
【n8n系列教程6】十分钟完成：每日AI总结L站热门贴，并自动推送到你的邮箱资源荟萃	41	3239	2025 年6 月 3 日
n8n+rsshub=监控“anything” 开发调优人工智能 , 软件开发	54	2839	2025 年8 月 3 日
大佬们有啥NAS上好玩儿的 Docker镜像推荐？搞七捻三 Docker , 人工智能 , nas , 快问快答 , 纯水	75	4005	2025 年6 月 10 日
玩玩n8n，起点更新推送、Claude、OpenAI、Gemini、Cursor状态推送搞七捻三纯水	15	1019	2025 年5 月 27 日
【n8n+warp+puppeteer】二十分钟完成：每日AI总结L站热门贴，并自动推送到电报频道资源荟萃 VPS , 人工智能	23	1580	2025 年9 月 24 日

n8n 获取新闻联播文字稿工作流

相关话题