n8n 获取新闻联播文字稿工作流

搭建好了n8n不知道干点啥,就在坛子里搜了下,发现了这个帖子每天新闻联播的文字稿自动整理推送的程序 应该是有吧??????

利用这位大佬代码里提供的链接,获取到了新闻联播的数据,在n8n里鼓捣了一下,成功收到了邮件 :partying_face:


{
  "name": "新闻联播抓取",
  "nodes": [
    {
      "parameters": {},
      "id": "020a89e2-8a66-4ae7-b7ad-a8d9c1c5b5ea",
      "name": "开始",
      "type": "n8n-nodes-base.start",
      "typeVersion": 1,
      "position": [
        -784,
        224
      ]
    },
    {
      "parameters": {
        "url": "=http://tv.cctv.com/lm/xwlb/day/{{$json.date}}.shtml",
        "options": {}
      },
      "id": "3463b2f6-0255-4a8d-83c7-d5fd8a07a06a",
      "name": "1. 获取新闻列表HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        -336,
        224
      ]
    },
    {
      "parameters": {
        "jsCode": "const allLinks = $json.url;\n// 使用 Set 高效去重\nconst uniqueLinks = [...new Set(allLinks)];\n\n// 第一个链接是摘要链接\nconst abstractLink = uniqueLinks.shift();\n\n// 剩下的是独立新闻的链接\nconst newsLinks = uniqueLinks;\n\n// 返回一个结构清晰的新项目\nreturn { \n  abstractLink: abstractLink, \n  newsLinks: newsLinks \n};"
      },
      "id": "b4cf809e-e97a-4abf-b8a0-fccbc0f7cc6f",
      "name": "分离摘要和新闻链接",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        112,
        224
      ],
      "notes": "接收所有链接的列表,去除重复项,然后将其拆分为一个摘要链接和一组新闻文章链接。"
    },
    {
      "parameters": {
        "url": "={{ $json.abstractLink }}",
        "options": {}
      },
      "id": "06eb3c16-3600-4add-9404-cee913b522f9",
      "name": "2. 获取摘要HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        336,
        32
      ]
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "abstractRaw",
              "cssSelector": "#page_body > div.allcontent > div.video18847 > div.playingCon > div.nrjianjie_shadow > div > ul > li:nth-child(1) > p"
            }
          ]
        },
        "options": {}
      },
      "id": "149c26a6-ad32-492d-a2c9-00df87a12c1f",
      "name": "提取摘要文本",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        560,
        32
      ]
    },
    {
      "parameters": {
        "batchSize": "=1",
        "options": {
          "reset": false
        }
      },
      "id": "2cbd85c9-8ccf-468c-9f3a-691d081a4c35",
      "name": "循环处理新闻链接",
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 2,
      "position": [
        560,
        416
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {}
      },
      "id": "9a8f5f7e-3d89-487c-9b70-398c66c3a41d",
      "name": "3. 获取新闻文章HTML",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.1,
      "position": [
        784,
        416
      ],
      "executeOnce": true
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "title",
              "cssSelector": "#page_body > div.allcontent > div.video18847 > div.playingVideo > div.tit"
            },
            {
              "key": "content",
              "cssSelector": "#content_area"
            }
          ]
        },
        "options": {}
      },
      "id": "43dd14e5-d879-47f6-b43d-16258fc26f2e",
      "name": "提取标题和内容",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        1008,
        416
      ]
    },
    {
      "parameters": {
        "jsCode": "const item = $json;\n// 清理标题,移除 '[视频]' 并去除首尾空格\nconst cleanTitle = item.title?.replace('[视频]', '').trim();\nconst link = $('循环处理新闻链接').item.json.url;\n\nitem.title = cleanTitle;\nitem.link = link;\n\nreturn item;"
      },
      "id": "ea2b21e9-6f6d-4061-bf4a-ed6fbceab408",
      "name": "清理抓取的数据",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1232,
        488
      ]
    },
    {
      "parameters": {
        "values": {
          "string": [
            {
              "name": "abstract",
              "value": "={{ $json.abstractRaw.replaceAll(';', \";\\n\\n\").replaceAll(':', \":\\n\\n\").trim() }}"
            }
          ]
        },
        "options": {}
      },
      "id": "b8073e67-0771-4f85-9588-e9ff417ed158",
      "name": "格式化摘要",
      "type": "n8n-nodes-base.set",
      "typeVersion": 2,
      "position": [
        784,
        32
      ]
    },
    {
      "parameters": {
        "jsCode": "// --- 核心逻辑开始 ---\n\n// 目标:将分散在多个n8n item中的数据合并成一个完整的对象,\n// 然后生成Markdown,最后将所有数据(原始+新增)作为一个item返回。\n\n// 1. 数据合并与验证\n// 创建一个空对象,用于存放所有输入item的数据。\nconst consolidatedData = {};\n\n// 遍历所有的输入item (items是n8n提供的全局变量)\nfor (const item of items) {\n  // 使用Object.assign将当前item的json数据合并到consolidatedData中\n  // 这能确保无论abstract和news在哪个item里,都会被正确合并。\n  Object.assign(consolidatedData, item.json);\n}\n\n// 验证合并后的数据是否完整\nif (!consolidatedData.abstractRaw || !consolidatedData.news) {\n  // 如果缺少关键字段,抛出明确的错误,方便调试\n  throw new Error(\"输入数据不完整,必须同时包含 'abstractRaw' 和 'news' 字段。请检查上一个节点的输出。\");\n}\n\n// 至此, `consolidatedData` 已经是一个包含所有必需数据的完整对象:\n// { abstractRaw: \"...\", abstract: \"...\", news: [...] }\n\n\n// 2. Markdown内容生成\n// 从 abstractRaw 中提取标题和日期\nconst titleMatch = consolidatedData.abstractRaw.match(/《(.*?)》\\s*(\\d{8})/);\nlet mainTitle = \"新闻简报\";\nlet fileNameBase = \"news_report\";\n\nif (titleMatch && titleMatch.length >= 3) {\n  const programName = titleMatch[1];\n  const dateStr = titleMatch[2];\n  mainTitle = `${programName} ${dateStr}`;\n  fileNameBase = `${programName}_${dateStr}`;\n}\n\n// 初始化Markdown字符串\nlet markdownContent = `# ${mainTitle}\\n\\n`;\n\n// (新代码块)\n// --- 处理“本期内容提要”并转换为无序列表 ---\n// 1. 先提取纯净的摘要文本,移除头部和尾部不需要的内容\nlet abstractListText = consolidatedData.abstractRaw\n  .replace('本期节目主要内容:', '')      // 移除开头的 \"本期节目主要内容:\"\n  .replace(/\\(《.*?》.*?\\)/g, '') // 移除结尾的 \"(《新闻联播》...)\"\n  .trim();                         // 移除首尾多余的空格和换行\n// 2. 将文本块转换为Markdown无序列表\nconst formattedList = abstractListText\n  .split('\\n')                     // 按换行符分割成一个行数组\n  .map(line => line.trim())        // 清理每一行的前后空格\n  .filter(line => line.length > 0) // 过滤掉所有空行\n  .map(line => {\n    // 使用正则表达式替换行首的数字列表标记为Markdown无序列表标记\n    // 匹配 \"1.\" \"10.\" 等\n    let newLine = line.replace(/^\\d+\\.\\s*/, '- ');\n    // 匹配 \"(1)\" \"(10)\" 等,并添加缩进\n    newLine = newLine.replace(/^[(\\(]\\d+[)\\)]\\s*/, '  - ');\n    return newLine;\n  })\n  .join('\\n'); // 将处理后的行数组重新用换行符拼接成一个字符串\n// 3. 将格式化好的列表添加到最终的Markdown内容中\nmarkdownContent += `## 本期内容提要\\n\\n`;\nmarkdownContent += `${formattedList}\\n\\n`;\n\n// 添加分割线\nmarkdownContent += `---\\n\\n`;\n\n// 循环处理每一条新闻详情\n// 根据 pairedItem.item 对 news 数组进行排序,确保新闻顺序正确\nconst sortedNews = consolidatedData.news.sort((a, b) => a.pairedItem.item - b.pairedItem.item);\n\nfor (const newsItem of sortedNews) {\n  const { title, content, link } = newsItem.json;\n  \n  // 添加新闻标题\n  markdownContent += `## ${title}\\n\\n`;\n  \n  // 添加新闻内容\n  markdownContent += `${content}\\n\\n`;\n  \n  // 添加原文链接(如果存在)\n  if (link) {\n    markdownContent += `[原文链接](${link})\\n\\n`;\n  }\n  \n  // 在每条新闻后添加分割线\n  markdownContent += `---\\n\\n`;\n}\n\n// 3. 构造最终输出\n// 创建一个新的结果对象。\nconst result = {\n  json: {\n    // 使用扩展运算符(...)保留所有原始字段\n    ...consolidatedData,\n    \n    // 添加我们新生成的字段\n    fileName: `${fileNameBase}.md`,\n    markdownContent: markdownContent,\n  }\n};\n\n// 4. 返回结果\n// n8n的Code节点要求返回一个数组,这里我们只返回处理好的这一个item。\nreturn result;\n"
      },
      "id": "e4759eba-f833-45ea-91f7-7d40c0ea9fa3",
      "name": "4. 组装Markdown",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1232,
        128
      ]
    },
    {
      "parameters": {},
      "id": "e15c555f-9ed9-4307-bf8e-5ee85c3cffb4",
      "name": "完成",
      "type": "n8n-nodes-base.noOp",
      "typeVersion": 1,
      "position": [
        1680,
        128
      ]
    },
    {
      "parameters": {
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "title",
              "cssSelector": "li a:first-child",
              "returnValue": "attribute",
              "attribute": "title",
              "returnArray": true
            },
            {
              "key": "url",
              "cssSelector": "li a:first-child",
              "returnValue": "attribute",
              "attribute": "href",
              "returnArray": true
            }
          ]
        },
        "options": {
          "trimValues": false,
          "cleanUpText": false
        }
      },
      "type": "n8n-nodes-base.html",
      "typeVersion": 1.2,
      "position": [
        -112,
        224
      ],
      "id": "e88e0875-fe7b-4cd2-b3a2-9c8578e9b1cb",
      "name": "HTML"
    },
    {
      "parameters": {
        "jsCode": "\nconst newsLinks = $json.newsLinks\n\nconst links = newsLinks.map(item =>  {return {url:item}})\n\nreturn links\n\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        336,
        416
      ],
      "id": "6fe5e395-18b4-44d5-9dc9-6ccbc33918f8",
      "name": "Code in JavaScript"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.merge",
      "typeVersion": 3.2,
      "position": [
        1008,
        128
      ],
      "id": "a339def8-f939-4a6e-8a72-e8c4cd0d9e14",
      "name": "Merge"
    },
    {
      "parameters": {
        "jsCode": "// Loop over input items and add a new field called 'myNewField' to the JSON of each one\n// for (const item of $input.all()) {\n//   item.json.myNewField = 1;\n// }\n\nreturn { news:$input.all()};"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        784,
        224
      ],
      "id": "e8286a0d-30f8-4cd1-8777-762847a66eb5",
      "name": "Code in JavaScript1"
    },
    {
      "parameters": {
        "fromEmail": "",
        "toEmail": "",
        "subject": "={{$json.fileName}}",
        "emailFormat": "text",
        "text": "={{ $json.markdownContent }}",
        "options": {}
      },
      "type": "n8n-nodes-base.emailSend",
      "typeVersion": 2.1,
      "position": [
        1456,
        128
      ],
      "id": "0cd2830e-c8cf-4319-9aa0-2ab5fc69ff78",
      "name": "Send email",
      "webhookId": "88866a77-2dd9-43ea-aa2a-6346d6f38ebb",
      "credentials": {
        "smtp": {
          "id": "1",
          "name": "SMTP account"
        }
      }
    },
    {
      "parameters": {
        "jsCode": "// 1. 获取当前时间\nconst now = new Date();\n\n// 2. 获取上海时区的当前小时 (24小时制)\nconst shanghaiHour = parseInt(\n  now.toLocaleTimeString('sv-SE', {\n    hour: '2-digit',\n    hour12: false,\n    timeZone: 'Asia/Shanghai',\n  }),\n  10\n);\n\n// 3. 根据小时数决定使用哪个日期\nlet dateToFormat = now;\nif (shanghaiHour < 20) {\n  // 小于20点,日期减一天\n  // 创建一个新日期对象,避免修改原始的 `now` 对象\n  dateToFormat = new Date(now.getTime() - 24 * 60 * 60 * 1000);\n}\n\n// 4. 格式化最终的日期\nconst date = dateToFormat\n  .toLocaleDateString('zh-CN', {\n    year: 'numeric',\n    month: '2-digit',\n    day: '2-digit',\n    timeZone: 'Asia/Shanghai',\n  })\n  .replace(/\\//g, '');\n\nreturn {date}\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -560,
        224
      ],
      "id": "e0decf06-750d-4ccf-8515-fb68967ec67b",
      "name": "获取日期"
    }
  ],
  "pinData": {},
  "connections": {
    "开始": {
      "main": [
        [
          {
            "node": "获取日期",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1. 获取新闻列表HTML": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "分离摘要和新闻链接": {
      "main": [
        [
          {
            "node": "2. 获取摘要HTML",
            "type": "main",
            "index": 0
          },
          {
            "node": "Code in JavaScript",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2. 获取摘要HTML": {
      "main": [
        [
          {
            "node": "提取摘要文本",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "提取摘要文本": {
      "main": [
        [
          {
            "node": "格式化摘要",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "循环处理新闻链接": {
      "main": [
        [
          {
            "node": "3. 获取新闻文章HTML",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Code in JavaScript1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "3. 获取新闻文章HTML": {
      "main": [
        [
          {
            "node": "提取标题和内容",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "提取标题和内容": {
      "main": [
        [
          {
            "node": "清理抓取的数据",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "清理抓取的数据": {
      "main": [
        [
          {
            "node": "循环处理新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "格式化摘要": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4. 组装Markdown": {
      "main": [
        [
          {
            "node": "Send email",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML": {
      "main": [
        [
          {
            "node": "分离摘要和新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code in JavaScript": {
      "main": [
        [
          {
            "node": "循环处理新闻链接",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "4. 组装Markdown",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code in JavaScript1": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Send email": {
      "main": [
        [
          {
            "node": "完成",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "获取日期": {
      "main": [
        [
          {
            "node": "1. 获取新闻列表HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "4ac25040-5885-47f0-842b-467d3aa86178",
  "meta": {
    "templateCredsSetupCompleted": true,
    "instanceId": "a92c2d89e657584e4fcb95858841e445654f73c2cecc87656a167b5cf09f5b9e"
  },
  "id": "FlqydI2IFBwHG8XK",
  "tags": []
}
24 个赞

大佬厉害

1 个赞

看着有意思

1 个赞

我意识到我可以直接从上面仓库的存档里获取 :tieba_002:

3 个赞

你发现了更优化的工作流:tieba_013:

佬,这个json没办法直接导入呀。

用AI修好了:

2 个赞

我也尝试了,结果发现,n8n github访问不能

怎么会不能

HTTP Request随便访问

或者你直接用Github节点 但是需要凭证

1 个赞

修好了,json添加了多余的符号,现在应该可以直接导入了

1 个赞

用的nas,nas部署docker时候没有指定翻墙。

话说n8n是支持worker对吧?那是否可以指定worker的部分出站走socks代理?


不会玩

n8n一直是个迷,得花时间研究下

Send email我没有放key呀, 这个需要你自己的key

添加了个markdown转html的节点

明天试一下