add pandoc as post processor
This commit is contained in:
parent
6c622bce1f
commit
6ccec68923
3 changed files with 10 additions and 7 deletions
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
__spiderss__ is a plaintext RSS crawler, based on [feedparser](https://github.com/kurtmckee/feedparser), [python-readability](https://github.com/buriy/python-readability) and [html2text](https://github.com/Alir3z4/html2text).
|
__spiderss__ is a plaintext RSS crawler, based on [feedparser](https://github.com/kurtmckee/feedparser), [python-readability](https://github.com/buriy/python-readability), [html2text](https://github.com/Alir3z4/html2text) and [Pandoc](https://pandoc.org/).
|
||||||
Actually, it's just a python script.
|
Actually, it's just a python script.
|
||||||
|
|
||||||
Read the news you want, the way you want it.
|
Read the news you want, the way you want it.
|
||||||
|
|
@ -33,7 +33,7 @@ Call `nix-shell` in the project directory. This will drop you into a python envi
|
||||||
|
|
||||||
### Legacy OS
|
### Legacy OS
|
||||||
|
|
||||||
Install the requirements with `pip install -r requirements.txt`.
|
Install **Pandoc** and the python requirements with `pip install -r requirements.txt`.
|
||||||
|
|
||||||
### Android
|
### Android
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ stdenv.mkDerivation {
|
||||||
buildInputs = with pkgs; [
|
buildInputs = with pkgs; [
|
||||||
python37Full
|
python37Full
|
||||||
python37Packages.virtualenv
|
python37Packages.virtualenv
|
||||||
|
pandoc
|
||||||
];
|
];
|
||||||
src = null;
|
src = null;
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
|
|
|
||||||
12
spiderss.py
12
spiderss.py
|
|
@ -6,6 +6,7 @@ import html2text
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import toml
|
import toml
|
||||||
|
|
@ -79,10 +80,11 @@ def get_articles(feed_url):
|
||||||
|
|
||||||
|
|
||||||
# Write text to file
|
# Write text to file
|
||||||
def write_to_file(filename, text):
|
def write_to_file(filepath, text):
|
||||||
file = open(filename, 'w')
|
|
||||||
file.write(text)
|
# Postprocess article with pandoc and write to file
|
||||||
file.close()
|
pandoc = subprocess.Popen(['pandoc', '-f', 'markdown', '-t', 'markdown', '-o', filepath], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
|
pandoc.communicate(input = text.encode())
|
||||||
|
|
||||||
|
|
||||||
# Get filename from a date and a title
|
# Get filename from a date and a title
|
||||||
|
|
@ -129,7 +131,7 @@ def get_article(article, scrape):
|
||||||
# Construct head of article
|
# Construct head of article
|
||||||
image_url = get_article_image(article)
|
image_url = get_article_image(article)
|
||||||
date = datetime.fromtimestamp(mktime(article.published_parsed)).strftime(datetime_format)
|
date = datetime.fromtimestamp(mktime(article.published_parsed)).strftime(datetime_format)
|
||||||
head = '# {}\n\n{}{}{} - [Link]({})'.format(article.title, image_url, get_article_summary(article), date, article.link)
|
head = '# {}\n\n{}{}{}\n\n[Link]({})'.format(article.title, image_url, get_article_summary(article), date, article.link)
|
||||||
|
|
||||||
# Get body of article
|
# Get body of article
|
||||||
if scrape:
|
if scrape:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue