Kindle書籍のPDF化 (半自動化)
過去記事:
あくまで私的利用が目的です. 久々に環境構築したらいくつかハマったのでメモ.
テスト環境
Windows 11 on MacBook Air M1 Parallels
Python 3.12.3
環境構築
Kindle
v1.40.1 (新しいバージョンだとazwとは違う形式になり, 対応できないため)
1.35あたりから, Kindleアプリを何度か再起動すると1.40.1になる.
自分のアカウントでログイン
[ツール] -> [オプション] -> [一般] で「更新がある場合は自動的にインストールする」のチェックを外す
Calibre
6.29.0
7系だと
ebook-convert
が上手くいかなかった
DeDRM v10.0.3
Calibreで[環境設定] -> [プラグイン] -> [ファイルからプラグインを読み込む] -> DeDRM v10.0.3の中のDeDRM_plugin.zipを選択して [開く] -> セキュリテリスクは [はい] -> [calibreを今すぐ再起動]
ImageMagick
ImageMagick 6.8.9-5 Q16 x64 2014-06-26
もっと新しいのでも大丈夫なはず
本質的には不要だが, 表紙画像のアスペクト比を取得 (
identify
) するために使用
Python
Python 3.12.3
本質的には不要だが, 半自動化で利用するために使用
python
で実行できるようにPATHを通しておく
Code
convert.bat
convert.bat
を作成
cd %~dp0
python convertKindleToPDFInDir.py
pause
exit
convertKindleToPDFInDir.py
convertKindleToPDFInDir.py
を作成
xxxxxxxxxx
import subprocess
import sys
import xml.etree.ElementTree as ET
import os
import glob
import datetime
def command(cmd):
try:
result = subprocess.run(cmd, shell=True, check=True, encoding="utf8",
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
for line in result.stdout.splitlines():
yield line
except subprocess.CalledProcessError:
printLog("ERROR: Command was failed!!")
printShortLog("ERROR: Command was failed!!")
# print('Command [' + cmd + '] was failed.', file=sys.stderr)
printLog('Command [' + cmd + '] was failed.')
# sys.exit(1)
def getCoverRatio(dirName):
coverName = dirName + "cover.jpg"
cmd = 'identify -format "%w,%h" "' + coverName + '"'
coverH = 200
for result in command(cmd):
# print(result)
whTemp = result.split(',')
# print(whTemp)
whTempF = [float(s) for s in whTemp]
# print(whTempF)
ar = whTempF[1]/whTempF[0]
# print(ar)
coverH = int(200*ar)
# print(coverH)
return coverH
def getBookInfo(dirName):
metafileName = dirName + 'metadata.opf'
tree = ET.parse(metafileName)
root = tree.getroot()
author = ""
title = ""
for child in root:
# print(child.tag)
# print(child.attrib)
if "metadata" in child.tag:
# print("aaaaaa")
for grandChild in child:
# print(grandChild.tag)
if "title" in grandChild.tag:
# print(grandChild.text)
title = replaceFC(grandChild.text)
if "creator" in grandChild.tag:
# print(grandChild.text)
author = replaceFC(grandChild.text)
return author, title
# Forbidden Charactor
def replaceFC(s):
return s.replace('/', '-').replace(':', '-').replace('\\', '-').replace('*', '-').replace('?', '-').replace('"', '-').replace("'", '-').replace('<', '-').replace('>', '-').replace('|', '-')
def convertToPDF(inputName, outputName, coverH, author, title, margin):
# outputName = author + "_" + title + ".pdf"
# css = ' --extra-css "vertical.css"'
css = ''
# cmd = 'ebook-convert "'+inputName+'" "'+outputName+'" --input-profile default --custom-size 200x'+str(coverH)+ css +' --unit millimeter --preserve-cover-aspect-ratio --pdf-serif-family "游ゴシック" --pdf-sans-family "游ゴシック" --pdf-mono-family "游ゴシック" --pdf-standard-font serif --pdf-default-font-size 18 --pdf-mono-font-size 14 --pdf-page-margin-bottom '+margin+' --pdf-page-margin-left '+margin+' --pdf-page-margin-right '+margin+' --pdf-page-margin-top '+margin+' --pdf-footer-template "<footer style=\"width:30px;height:20px;transform:rotate(-90deg);\"><span style=\"font-size:10px;\">_PAGENUM_</span></footer>"'
# cmd = 'ebook-convert "'+inputName+'" "'+outputName+'" --input-profile default --custom-size 200x'+str(coverH)+ css +' --unit millimeter --preserve-cover-aspect-ratio --pdf-serif-family "游ゴシック" --pdf-sans-family "游ゴシック" --pdf-mono-family "游ゴシック" --pdf-standard-font serif --pdf-default-font-size 18 --pdf-mono-font-size 14 --pdf-page-margin-bottom '+margin+' --pdf-page-margin-left '+margin+' --pdf-page-margin-right '+margin+' --pdf-page-margin-top '+margin+' --pdf-page-numbers'
cmd = 'ebook-convert "'+inputName+'" "'+outputName+'" --input-profile default --custom-size 200x'+str(coverH)+ css +' --unit millimeter --preserve-cover-aspect-ratio --pdf-serif-family "游ゴシック" --pdf-sans-family "游ゴシック" --pdf-mono-family "游ゴシック" --pdf-standard-font serif --pdf-default-font-size 20 --pdf-mono-font-size 16 --pdf-page-margin-bottom '+margin+' --pdf-page-margin-left '+margin+' --pdf-page-margin-right '+margin+' --pdf-page-margin-top '+margin
printLog(cmd)
for result in command(cmd):
# print(result)
printLog(result)
def decideMargin(inputName):
size = os.path.getsize(inputName)
printLog(size)
margin = '40.0'
if size >= 20*10**6:
printLog("filesize: large!!")
margin = '-1.0'
else:
printLog("filesize: normal")
return margin
def replaceSB(s):
s2 = s.replace('[', 'abcdefg123456789L').replace(']', 'abcdefg123456789R')
return s2.replace('abcdefg123456789L', '[[]').replace('abcdefg123456789R', '[]]')
def getInputFilename(dirName):
# print(dirName)
# print(replaceSB(dirName))
filename = ""
# for file in glob.glob(dirName + "*"):
# print(file)
for file in glob.glob(replaceSB(dirName) + "*.azw*"):
printLog(file)
filename = file
return filename
def getDirnames(rootDir):
dirs = []
for file in glob.glob(rootDir + "*/*"):
# print(file)
dirName = file.replace('\\','/') + '/'
# print(dirName)
if os.path.isdir(dirName):
dirs.append(dirName)
# else:
# print("Not Directory")
printLog(len(dirs))
return dirs
def convertToPDFIn(inputDirName, outputDirName, coverExists, metafileExists):
dirName = inputDirName
# print(dirName)
inputName = getInputFilename(dirName)
# print("aaaa")
# print(inputName)
coverH = 284
if coverExists:
coverH = getCoverRatio(dirName)
author, title = "___NoMeta", datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
if metafileExists:
author, title = getBookInfo(dirName)
printLog([author, title])
# outputName = dirName + author + "_" + title + ".pdf"
outputName = outputDirName + author + "_" + title + ".pdf"
margin = decideMargin(inputName)
convertToPDF(inputName, outputName, coverH, author, title, margin)
def checkTargetExists(dirName):
azw3Exists = False
for file in glob.glob(replaceSB(dirName) + "*.azw*"):
azw3Exists = True
metafileName = dirName + 'metadata.opf'
metafileExists = os.path.isfile(metafileName)
coverName = dirName + 'cover.jpg'
coverExists = os.path.isfile(coverName)
allExists = azw3Exists and coverExists and metafileExists
if allExists:
printLog("All Exists")
else:
printLog("Not Found... : azw3=" + str(azw3Exists) + ", cover=" + str(coverExists) + ", meta=" + str(metafileExists))
# shouldPass = azw3Exists and coverExists
return azw3Exists, coverExists, metafileExists
def getShortLogFilename():
timeStr = initialTime.strftime("%Y-%m-%d-%H-%M-%S")
filename = "log/"+timeStr+"_short.txt"
return filename
def getLogFilename():
timeStr = initialTime.strftime("%Y-%m-%d-%H-%M-%S")
filename = "log/"+timeStr+".txt"
return filename
def makeLogFile():
# now = datetime.datetime.now()
# nowStr = now.strftime("%Y-%m-%d-%H-%M-%S")
# filename = "log"+nowStr+".txt"
os.makedirs("log", exist_ok=True)
f = open(getLogFilename(), "a", encoding="utf-8")
f.write(getLogFilename()+"\n")
f.close()
f = open(getShortLogFilename(), "a", encoding="utf-8")
f.write(getShortLogFilename()+"\n")
f.write("datetime, azw(3), cover, meta, directory\n")
f.close()
# return filename
def printLog(log):
s = log
if type(log) == list:
s = " ".join(map(str,log))
else:
s = str(log)
print(s)
f = open(getLogFilename(), "a", encoding="utf-8")
f.write(s+"\n")
f.close()
def printShortLog(log):
s = log
if type(log) == list:
s = ", ".join(map(str,log))
else:
s = str(log)
f = open(getShortLogFilename(), "a", encoding="utf-8")
f.write(s+"\n")
f.close()
def main():
makeLogFile()
rootDir = "C:/Users/[User Name]/CalibreToConvert/"
outputDirName = rootDir + "_output/"
os.makedirs(outputDirName, exist_ok=True)
inputDirNames = getDirnames(rootDir)
allNum = len(inputDirNames)
count = 0
for idn in inputDirNames:
count += 1
currentLabel = str(count) + "/" + str(allNum)
printLog("# START ("+currentLabel+") ########################################################")
now = datetime.datetime.now()
printLog(now)
printLog(idn)
azw3Exists, coverExists, metafileExists = checkTargetExists(idn)
printShortLog([now, azw3Exists, coverExists, metafileExists, idn])
if azw3Exists:
convertToPDFIn(idn, outputDirName, coverExists, metafileExists)
printLog("# END ("+currentLabel+") ##########################################################\n\n\n")
if __name__ == "__main__":
initialTime = datetime.datetime.now()
main()
実行
Kindleアプリで本をDL.
xxxxxxxxxx
C:\Users\[User Name]\Documents\My Kindle Content
にDLされてる.
calibreで上記にアクセスし, 当該の本を追加
xxxxxxxxxx
C:\Users\[User Name]\calibreライブラリ
にCalibre用としてコピーされる. フォルダ構成は
[著者]/[タイトル]
x
C:/Users/[User Name]/CalibreToConvert/
などの作業用フォルダを作成し, そこに上記の [著者]フォルダごとコピー
convertKindleToPDFInDir.py
の中のmain関数内のrootDir
を, 上記の作業用フォルダC:/Users/[User Name]/CalibreToConvert/
を指定するように変更convert.bat
をダブルクリックxxxxxxxxxx
C:/Users/[User Name]/CalibreToConvert/_output/
フォルダに変換済みのpdfが作成される. 完了.