fetch.ts 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import axios from "axios";
  2. import cheerio from "cheerio";
  3. import { ArgumentParser } from "argparse";
  4. const getPaperUrls = async (year: number): Promise<string[]> => {
  5. let hasNextPage = true;
  6. let pageIndex = 0;
  7. const ret: string[] = [];
  8. while (hasNextPage) {
  9. const response = await axios.get(
  10. "https://sousuo.www.gov.cn/search-gov/data",
  11. {
  12. params: {
  13. // 不区分发布机构 `?t=zhengcelibrary_gw_bm_gb`
  14. // 分发布机构国务 `?t=zhengcelibrary_gw` 即 gw - 国务
  15. // 国务院部门文件 `?t=zhengcelibrary_bm` 即 bm - 部门
  16. // 国务院公报文件 `?t=zhengcelibrary_gb` 即 gb - 公报
  17. t: "zhengcelibrary_gw",
  18. p: pageIndex,
  19. n: 5,
  20. q: `假期 ${year}`,
  21. pcodeJiguan: "国办发明电",
  22. puborg: "国务院办公厅",
  23. filetype: "通知",
  24. sort: "pubtime",
  25. },
  26. }
  27. );
  28. if (response.status !== 200) {
  29. throw new Error(`Request failed with status code ${response.status}`);
  30. }
  31. const data = response.data;
  32. if (data.code === 1001) {
  33. return [];
  34. }
  35. if (data.code !== 200) {
  36. throw new Error(`Error: ${data.code}: ${data.msg}`);
  37. }
  38. for (const item of data.searchVO.listVO) {
  39. if (item.title.includes(year.toString())) {
  40. ret.unshift(item.url);
  41. }
  42. }
  43. pageIndex += 1;
  44. hasNextPage = pageIndex < data.searchVO.totalpage;
  45. }
  46. return ret;
  47. };
  48. const getPaper = async (url: string): Promise<string> => {
  49. const response = await axios.get(url);
  50. if (response.status !== 200) {
  51. throw new Error(`Request failed with status code ${response.status}`);
  52. }
  53. const $ = cheerio.load(response.data);
  54. const container = $("#UCAP-CONTENT");
  55. if (!container.length) {
  56. throw new Error(`Cannot get paper container from url: ${url}`);
  57. }
  58. const paragraphs = container.html()?.replace(/<br\/>/g, "</p><p>");
  59. const p = cheerio.load(paragraphs || "")("p");
  60. const ret = p
  61. .map((_, el) => $(el).text().trim())
  62. .get()
  63. .join("\n");
  64. if (!ret) {
  65. throw new Error(`Cannot get paper content from url: ${url}`);
  66. }
  67. return ret;
  68. };
  69. const fetchHoliday = async (year: number): Promise<string> => {
  70. const paperUrls = await getPaperUrls(year);
  71. const papers: string[] = [];
  72. for (const url of paperUrls) {
  73. const paper = await getPaper(url);
  74. papers.push(paper);
  75. }
  76. return papers.join("\n");
  77. };
  78. const main = async () => {
  79. const parser = new ArgumentParser();
  80. parser.addArgument("year", { type: "int" });
  81. const args = parser.parseArgs();
  82. const year = args.year;
  83. console.log(`Fetching holiday for ${year}...`);
  84. const result = await fetchHoliday(year);
  85. console.log(result, result.length);
  86. };
  87. main().catch((error) => {
  88. console.error(error);
  89. process.exit(1);
  90. });