aboutsummaryrefslogtreecommitdiff
blob: b8258dfa04d6ce47cb2f01632aff00f28f8ffd7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
From 75920321062d682437f3fb0319dad227d8b18f6c Mon Sep 17 00:00:00 2001
From: Gabriele Musco <gabmus@disroot.org>
Date: Sat, 25 Mar 2023 14:13:44 +0100
Subject: [PATCH] add author extraction for feed item

---
 src/feed_item.cpp | 12 +++++++++---
 src/feed_item.hpp | 14 ++++++++++++++
 src/pybind.cpp    |  4 ++++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/feed_item.cpp b/src/feed_item.cpp
index a08cd71..e0dbb8c 100644
--- a/src/feed_item.cpp
+++ b/src/feed_item.cpp
@@ -1,5 +1,5 @@
-#include "feed_item.hpp"
-#include "utils.hpp"
+#include "./feed_item.hpp"
+#include "./utils.hpp"
 
 std::string FeedItem::extract_url() {
     std::string res = item_node.child("link").text().as_string();
@@ -75,6 +75,10 @@ void FeedItem::parse() {
     // pub_date
     pub_date = SynDomUtils::extract_from_node(item_node, __PUB_DATE_PARAMS);
 
+    // author
+    author_name = SynDomUtils::extract_from_node(item_node, __AUTHOR_NAME_PARAMS);
+    author_url = SynDomUtils::extract_from_node(item_node, __AUTHOR_URL_PARAMS);
+
     // img_url
     img_url = extract_img_url();
     fix_url(img_url);
@@ -87,6 +91,8 @@ std::string FeedItem::to_json() {
         "            \"url\": \"" + url + "\",\n"
         "            \"media_url\": \"" + media_url + "\",\n"
         "            \"pub_date\": \"" + pub_date + "\",\n"
-        "            \"img_url\": \"" + img_url + "\"\n"
+        "            \"img_url\": \"" + img_url + "\",\n"
+        "            \"author_name\": \"" + author_name + "\",\n"
+        "            \"author_url\": \"" + author_url + "\"\n"
         "        }";
 }
diff --git a/src/feed_item.hpp b/src/feed_item.hpp
index 5d7105e..fd1259f 100644
--- a/src/feed_item.hpp
+++ b/src/feed_item.hpp
@@ -28,6 +28,8 @@ private:
     std::string media_url;
     std::string pub_date;
     std::string img_url;
+    std::string author_name;
+    std::string author_url;
 
     /**
     * Tries to extract the item url and returns it.
@@ -73,6 +75,16 @@ private:
         {ExtractionParam::ParamType::CHILD, {"date"}},
         {ExtractionParam::ParamType::CHILD, {"dc:date"}}
     };
+    static inline const std::vector<ExtractionParam> __AUTHOR_NAME_PARAMS{
+        {ExtractionParam::ParamType::CHILD, {"author", "name"}},
+        {ExtractionParam::ParamType::CHILD, {"author"}},
+        {ExtractionParam::ParamType::CHILD, {"dc:creator"}},
+        {ExtractionParam::ParamType::CHILD, {"creator"}},
+        {ExtractionParam::ParamType::CHILD, {"itunes:author"}},
+    };
+    static inline const std::vector<ExtractionParam> __AUTHOR_URL_PARAMS{
+        {ExtractionParam::ParamType::CHILD, {"author", "uri"}}
+    };
     /**
     * Entry point of the class, parses all the relevant content. Called by
     * the constructor.
@@ -101,6 +113,8 @@ public:
     std::string get_media_url() { return media_url; }
     std::string get_pub_date() { return pub_date; }
     std::string get_img_url() { return img_url; }
+    std::string get_author_name() { return author_name; }
+    std::string get_author_url() { return author_url; }
 
     /**
     * Represents the FeedItem object (itself) as a json, returned as a string.
diff --git a/src/pybind.cpp b/src/pybind.cpp
index bef72f9..1d5a58d 100644
--- a/src/pybind.cpp
+++ b/src/pybind.cpp
@@ -19,11 +19,15 @@ PYBIND11_MODULE(syndom, m) {
         .def_property_readonly("media_url", &FeedItem::get_media_url)
         .def_property_readonly("pub_date", &FeedItem::get_pub_date)
         .def_property_readonly("img_url", &FeedItem::get_img_url)
+        .def_property_readonly("author_name", &FeedItem::get_author_name)
+        .def_property_readonly("author_url", &FeedItem::get_author_url)
         .def("get_title", &FeedItem::get_title)
         .def("get_content", &FeedItem::get_content)
         .def("get_url", &FeedItem::get_url)
         .def("get_media_url", &FeedItem::get_media_url)
         .def("get_pub_date", &FeedItem::get_pub_date)
+        .def("get_author_name", &FeedItem::get_author_name)
+        .def("get_author_url", &FeedItem::get_author_url)
         .def("get_img_url", &FeedItem::get_img_url);
     py::class_<Feed>(m, "Feed")
         .def(py::init<std::string>())
-- 
GitLab