fetch.ts 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import fs from "fs";
  2. import path from 'path';
  3. import axios from "axios";
  4. import * as cheerio from "cheerio";
  5. import { ArgumentParser } from "argparse";
  6. const getPaperUrls = async (year: number): Promise<string[]> => {
  7. let hasNextPage = true;
  8. let pageIndex = 0;
  9. const ret: string[] = [];
  10. while (hasNextPage) {
  11. const response = await axios.get(
  12. "https://sousuo.www.gov.cn/search-gov/data",
  13. {
  14. params: {
  15. // 不区分发布机构 `?t=zhengcelibrary_gw_bm_gb`
  16. // 分发布机构国务 `?t=zhengcelibrary_gw` 即 gw - 国务
  17. // 国务院部门文件 `?t=zhengcelibrary_bm` 即 bm - 部门
  18. // 国务院公报文件 `?t=zhengcelibrary_gb` 即 gb - 公报
  19. t: "zhengcelibrary_gw",
  20. p: pageIndex,
  21. n: 5,
  22. q: `假期 ${year}`,
  23. pcodeJiguan: "国办发明电",
  24. puborg: "国务院办公厅",
  25. filetype: "通知",
  26. sort: "pubtime",
  27. },
  28. }
  29. );
  30. if (response.status !== 200) {
  31. throw new Error(`Request failed with status code ${response.status}`);
  32. }
  33. const data = response.data;
  34. if (data.code === 1001) {
  35. return [];
  36. }
  37. if (data.code !== 200) {
  38. throw new Error(`Error: ${data.code}: ${data.msg}`);
  39. }
  40. for (const item of data.searchVO.listVO) {
  41. if (item.title.includes(year.toString())) {
  42. ret.unshift(item.url);
  43. }
  44. }
  45. pageIndex += 1;
  46. hasNextPage = pageIndex < data.searchVO.totalpage;
  47. }
  48. return ret;
  49. };
  50. const getPaper = async (url: string): Promise<string> => {
  51. const response = await axios.get(url);
  52. if (response.status !== 200) {
  53. throw new Error(`Request failed with status code ${response.status}`);
  54. }
  55. const $ = cheerio.load(response.data);
  56. const container = $("#UCAP-CONTENT");
  57. if (!container.length) {
  58. throw new Error(`Cannot get paper container from url: ${url}`);
  59. }
  60. const paragraphs = container.html()?.replace(/<br\/>/g, "</p><p>");
  61. const p = cheerio.load(paragraphs || "")("p");
  62. const ret = p
  63. .map((_, el) => $(el).text().trim())
  64. .get()
  65. .join("\n");
  66. if (!ret) {
  67. throw new Error(`Cannot get paper content from url: ${url}`);
  68. }
  69. return ret;
  70. };
  71. const fetchHoliday = async (year: number): Promise<string> => {
  72. const paperUrls = await getPaperUrls(year);
  73. const papers: string[] = [];
  74. for (const url of paperUrls) {
  75. const paper = await getPaper(url);
  76. papers.push(paper);
  77. }
  78. return papers.join("\n");
  79. };
  80. const main = async () => {
  81. const parser = new ArgumentParser();
  82. parser.addArgument("year", { type: "int" });
  83. const args = parser.parseArgs();
  84. const year = args.year;
  85. console.log(`Fetching holiday for ${year}...`);
  86. let result = await fetchHoliday(year);
  87. if (result && result.length > 0) {
  88. console.log(result)
  89. result = result.split('\n').map(line => `<p>${line}</p>`).join('')
  90. const outputPath = process.env.GITHUB_OUTPUT;
  91. const holidaysFile = path.join(process.cwd(), 'holidays.html');
  92. fs.writeFileSync(holidaysFile, result);
  93. if (outputPath) {
  94. fs.appendFileSync(outputPath, `holidays=${holidaysFile}\n`);
  95. }
  96. } else {
  97. console.log('No holidays found.');
  98. }
  99. };
  100. main().catch((error) => {
  101. console.error(error);
  102. process.exit(1);
  103. });