Skip to content

Commit

Permalink
Create 1 Tika Config and catch InvalidMimeTypeExceptions (frankframew…
Browse files Browse the repository at this point in the history
  • Loading branch information
nielsm5 authored Sep 6, 2023
1 parent 1e05f6d commit 15fb5a2
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.ThreadContext;
import org.springframework.util.InvalidMimeTypeException;
import org.springframework.util.MimeType;

import com.nimbusds.jose.util.JSONObjectUtils;
Expand Down Expand Up @@ -78,7 +79,7 @@
*/
@IbisInitializer
public class ApiListenerServlet extends HttpServletBase {
protected Logger log = LogUtil.getLogger(this);
private static final Logger LOG = LogUtil.getLogger(ApiListenerServlet.class);
private static final long serialVersionUID = 1L;

public static final String AUTHENTICATION_COOKIE_NAME = "authenticationToken";
Expand Down Expand Up @@ -142,16 +143,16 @@ protected void service(HttpServletRequest request, HttpServletResponse response)
method = EnumUtils.parse(HttpMethod.class, request.getMethod());
} catch (IllegalArgumentException e) {
response.setStatus(405);
log.warn("{} method [{}] not allowed", () -> createAbortMessage(remoteUser, 405), request::getMethod);
LOG.warn("{} method [{}] not allowed", () -> createAbortMessage(remoteUser, 405), request::getMethod);
return;
}

String uri = request.getPathInfo();
log.info("ApiListenerServlet dispatching uri [{}] and method [{}]{}", uri, method, (StringUtils.isNotEmpty(remoteUser) ? " issued by ["+remoteUser+"]" : ""));
LOG.info("ApiListenerServlet dispatching uri [{}] and method [{}]{}", uri, method, (StringUtils.isNotEmpty(remoteUser) ? " issued by ["+remoteUser+"]" : ""));

if (uri==null) {
response.setStatus(400);
log.warn("{} empty uri", () -> createAbortMessage(remoteUser, 400));
LOG.warn("{} empty uri", () -> createAbortMessage(remoteUser, 400));
return;
}
if(uri.endsWith("/")) {
Expand Down Expand Up @@ -221,7 +222,7 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
ApiDispatchConfig config = dispatcher.findConfigForUri(uri);
if(config == null) {
response.setStatus(404);
log.warn("{} no ApiListener configured for [{}]", ()-> createAbortMessage(remoteUser, 404), ()-> uri);
LOG.warn("{} no ApiListener configured for [{}]", ()-> createAbortMessage(remoteUser, 404), ()-> uri);
return;
}

Expand All @@ -247,7 +248,7 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
//Only cut off OPTIONS (aka preflight) requests
if(method == HttpMethod.OPTIONS) {
response.setStatus(200);
if(log.isTraceEnabled()) log.trace("Aborting preflight request with status [200], method [{}]", method);
if(LOG.isTraceEnabled()) LOG.trace("Aborting preflight request with status [200], method [{}]", method);
return;
}
}
Expand All @@ -258,11 +259,11 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
ApiListener listener = config.getApiListener(method);
if(listener == null) {
response.setStatus(405);
log.warn("{} method [{}] not allowed", ()-> createAbortMessage(remoteUser, 405), ()-> method);
LOG.warn("{} method [{}] not allowed", ()-> createAbortMessage(remoteUser, 405), ()-> method);
return;
}

if(log.isTraceEnabled()) log.trace("ApiListenerServlet calling service [{}]", listener.getName());
if(LOG.isTraceEnabled()) LOG.trace("ApiListenerServlet calling service [{}]", listener.getName());

/*
* Check authentication
Expand Down Expand Up @@ -303,7 +304,7 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
messageContext.setSecurityHandler(new JwtSecurityHandler(claimsSet, listener.getRoleClaim(), listener.getPrincipalNameClaim()));
messageContext.put("ClaimsSet", JSONObjectUtils.toJSONString(claimsSet));
} catch(Exception e) {
log.warn("unable to validate jwt",e);
LOG.warn("unable to validate jwt",e);
response.sendError(401, e.getMessage());
return;
}
Expand Down Expand Up @@ -351,7 +352,7 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
}

response.setStatus(401);
log.warn("{} no (valid) credentials supplied", ()->createAbortMessage(remoteUser, 401));
LOG.warn("{} no (valid) credentials supplied", ()->createAbortMessage(remoteUser, 401));
return;
}

Expand All @@ -377,36 +378,36 @@ private void handleRequest(HttpServletRequest request, HttpServletResponse respo
*/
final String acceptHeader = request.getHeader("Accept");
if(!listener.accepts(acceptHeader)) { // If an Accept header is present, make sure we comply to it!
log.warn("{} client expects Accept [{}] but listener can only provide [{}]", ()->createAbortMessage(request.getRemoteUser(), 406), ()-> acceptHeader, listener::getContentType);
LOG.warn("{} client expects Accept [{}] but listener can only provide [{}]", ()->createAbortMessage(request.getRemoteUser(), 406), ()-> acceptHeader, listener::getContentType);
response.sendError(406, "endpoint cannot provide the supplied MimeType");
return;
}

if(!listener.isConsumable(request.getContentType())) {
response.setStatus(415);
log.warn("{} did not match consumes [{}] got [{}] instead", ()-> createAbortMessage(remoteUser, 415), listener::getConsumes, request::getContentType);
LOG.warn("{} did not match consumes [{}] got [{}] instead", ()-> createAbortMessage(remoteUser, 415), listener::getConsumes, request::getContentType);
return;
}

String etagCacheKey = ApiCacheManager.buildCacheKey(uri);
log.debug("Evaluating preconditions for listener[{}] etagKey[{}]", listener.getName(), etagCacheKey);
LOG.debug("Evaluating preconditions for listener[{}] etagKey[{}]", listener.getName(), etagCacheKey);
if(cache.containsKey(etagCacheKey)) {
String cachedEtag = (String) cache.get(etagCacheKey);
log.debug("found etag value[{}] for key[{}]", cachedEtag, etagCacheKey);
LOG.debug("found etag value[{}] for key[{}]", cachedEtag, etagCacheKey);

if(method == HttpMethod.GET) {
String ifNoneMatch = request.getHeader("If-None-Match");
if(ifNoneMatch != null && ifNoneMatch.equals(cachedEtag)) {
response.setStatus(304);
if (log.isDebugEnabled()) log.debug("{} matched if-none-match [{}]", ()->createAbortMessage(remoteUser, 304), ()->ifNoneMatch);
if (LOG.isDebugEnabled()) LOG.debug("{} matched if-none-match [{}]", ()->createAbortMessage(remoteUser, 304), ()->ifNoneMatch);
return;
}
}
else {
String ifMatch = request.getHeader("If-Match");
if(ifMatch != null && !ifMatch.equals(cachedEtag)) {
response.setStatus(412);
log.warn("{} matched if-match [{}] method [{}]", ()->createAbortMessage(remoteUser, 412), ()->ifMatch, ()->method);
LOG.warn("{} matched if-match [{}] method [{}]", ()->createAbortMessage(remoteUser, 412), ()->ifMatch, ()->method);
return;
}
}
Expand Down Expand Up @@ -440,7 +441,7 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {

if(name != null) {
uriIdentifier++;
if(log.isTraceEnabled()) log.trace("setting uriSegment [{}] to [{}]", name, uriSegments[i]);
if(LOG.isTraceEnabled()) LOG.trace("setting uriSegment [{}] to [{}]", name, uriSegments[i]);
messageContext.put(name, uriSegments[i]);
}
}
Expand Down Expand Up @@ -482,17 +483,17 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
PartMessage message = new PartMessage(bodyPart);
if (!MultipartUtils.isBinary(bodyPart)) {
// Process regular form field (input type="text|radio|checkbox|etc", select, etc).
log.trace("setting multipart formField [{}] to [{}]", fieldName, message);
LOG.trace("setting multipart formField [{}] to [{}]", fieldName, message);
messageContext.put(fieldName, message.asString());
attachment.addAttribute("type", "text");
attachment.addAttribute("value", message.asString());
} else {
// Process form file field (input type="file").
final String fieldNameName = fieldName + "Name";
final String fileName = MultipartUtils.getFileName(bodyPart);
log.trace("setting multipart formFile [{}] to [{}]", fieldNameName, fileName);
LOG.trace("setting multipart formFile [{}] to [{}]", fieldNameName, fileName);
messageContext.put(fieldNameName, fileName);
log.trace("setting parameter [{}] to input stream of file [{}]", fieldName, fileName);
LOG.trace("setting parameter [{}] to input stream of file [{}]", fieldName, fileName);
messageContext.put(fieldName, message);

attachment.addAttribute("type", "file");
Expand All @@ -506,7 +507,7 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
messageContext.put("multipartAttachments", attachments.toXML());
} catch(MessagingException e) {
response.sendError(400, "Could not read mime multipart response");
log.warn("{} Could not read mime multipart response", () -> createAbortMessage(remoteUser, 400));
LOG.warn("{} Could not read mime multipart response", () -> createAbortMessage(remoteUser, 400));
return;
}
} else {
Expand All @@ -531,26 +532,26 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
* Calculate an eTag over the processed result and store in cache
*/
if (Boolean.TRUE.equals(messageContext.getBoolean(UPDATE_ETAG_CONTEXT_KEY))) {
log.debug("calculating etags over processed result");
LOG.debug("calculating etags over processed result");
String cleanPattern = listener.getCleanPattern();
if(!Message.isEmpty(result) && method == HttpMethod.GET && cleanPattern != null) { //If the data has changed, generate a new eTag
String eTag = MessageUtils.generateMD5Hash(result);
if(eTag != null) {
log.debug("adding/overwriting etag with key[{}] value[{}]", etagCacheKey, eTag);
LOG.debug("adding/overwriting etag with key[{}] value[{}]", etagCacheKey, eTag);
cache.put(etagCacheKey, eTag);
response.addHeader("etag", eTag);
} else {
log.debug("skipping etag with key[{}] computed value is null", etagCacheKey);
LOG.debug("skipping etag with key[{}] computed value is null", etagCacheKey);
}
}
else {
log.debug("removing etag with key[{}]", etagCacheKey);
LOG.debug("removing etag with key[{}]", etagCacheKey);
cache.remove(etagCacheKey);

// Not only remove the eTag for the selected resources but also the collection
String key = ApiCacheManager.getParentCacheKey(listener, uri);
if(key != null) {
log.debug("removing parent etag with key[{}]", key);
LOG.debug("removing parent etag with key[{}]", key);
cache.remove(key);
}
}
Expand All @@ -575,7 +576,7 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
if(!response.containsHeader("etag")) {
cacheControl.append("no-store, no-cache, ");
response.setHeader("Pragma", "no-cache");
log.trace("disabling cache for uri [{}]", request::getRequestURI);
LOG.trace("disabling cache for uri [{}]", request::getRequestURI);
}
cacheControl.append("must-revalidate, max-age=0, post-check=0, pre-check=0");
response.setHeader("Cache-Control", cacheControl.toString());
Expand All @@ -593,7 +594,7 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
if(StringUtils.isNotEmpty(listener.getContentDispositionHeaderSessionKey())) {
String contentDisposition = messageContext.getString(listener.getContentDispositionHeaderSessionKey());
if(StringUtils.isNotEmpty(contentDisposition)) {
log.debug("Setting Content-Disposition header to [{}]", contentDisposition);
LOG.debug("Setting Content-Disposition header to [{}]", contentDisposition);
response.setHeader("Content-Disposition", contentDisposition);
}
}
Expand All @@ -611,21 +612,21 @@ else if(segment.startsWith("{") && segment.endsWith("}")) {
*/
final boolean outputWritten = writeToResponseStream(response, result);
if (!outputWritten) {
log.debug("No output written, set content-type header to null");
LOG.debug("No output written, set content-type header to null");
response.resetBuffer();
response.setContentType(null);
}

log.trace("ApiListenerServlet finished with statusCode [{}] result [{}]", statusCode, result);
LOG.trace("ApiListenerServlet finished with statusCode [{}] result [{}]", statusCode, result);
}
catch (Exception e) {
log.warn("ApiListenerServlet caught exception, will rethrow as ServletException", e);
LOG.warn("ApiListenerServlet caught exception, will rethrow as ServletException", e);
try {
response.reset();
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e.getMessage());
}
catch (IOException | IllegalStateException ex) {
log.warn("an error occurred while trying to handle exception [{}]", e.getMessage(), ex);
LOG.warn("an error occurred while trying to handle exception [{}]", e.getMessage(), ex);
//We're only informing the end user(s), no need to catch this error...
response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
}
Expand All @@ -642,12 +643,12 @@ private Map<String, Object> extractRequestParams(HttpServletRequest request) {
String[] paramList = request.getParameterValues(paramName);
if(paramList.length > 1) { // contains multiple items
List<String> valueList = Arrays.asList(paramList);
if(log.isTraceEnabled()) log.trace("setting queryParameter [{}] to {}", paramName, valueList);
if(LOG.isTraceEnabled()) LOG.trace("setting queryParameter [{}] to {}", paramName, valueList);
params.put(paramName, valueList);
}
else {
String paramValue = request.getParameter(paramName);
if(log.isTraceEnabled()) log.trace("setting queryParameter [{}] to [{}]", paramName, paramValue);
if(LOG.isTraceEnabled()) LOG.trace("setting queryParameter [{}] to [{}]", paramName, paramValue);
params.put(paramName, paramValue);
}
}
Expand All @@ -669,23 +670,26 @@ private String extractHeaderParamsAsXml(HttpServletRequest request, ApiListener
headersXml.addSubElement(headerXml);
}
catch (Exception e) {
log.info("unable to convert header to xml name[{}] value[{}], exception message: {}", headerParam, headerValue, e.getMessage());
LOG.info("unable to convert header to xml name[{}] value[{}], exception message: {}", headerParam, headerValue, e.getMessage());
}
}
return headersXml.toXML();
}

private static MimeType determineContentType(PipeLineSession messageContext, ApiListener listener, Message result) throws IOException {
private static @Nonnull MimeType determineContentType(PipeLineSession messageContext, ApiListener listener, Message result) throws IOException {
if(listener.getProduces() == MediaTypes.ANY) {
Message parsedContentType = messageContext.getMessage("contentType");
if(!Message.isEmpty(parsedContentType)) {
return MimeType.valueOf(parsedContentType.asString());
} else {
MimeType providedContentType = MessageUtils.getMimeType(result); // MimeType might be known
if(providedContentType != null) {
return providedContentType;
try {
return MimeType.valueOf(parsedContentType.asString());
} catch (InvalidMimeTypeException imte) {
LOG.warn("unable to parse mimetype from SessionKey [contentType] value [{}]", parsedContentType, imte);
}
}
MimeType providedContentType = MessageUtils.getMimeType(result); // MimeType might be known
if(providedContentType != null) {
return providedContentType;
}
} else if(listener.getProduces() == MediaTypes.DETECT) {
MimeType computedContentType = MessageUtils.computeMimeType(result); // Calculate MimeType
if(computedContentType != null) {
Expand Down
20 changes: 15 additions & 5 deletions core/src/main/java/nl/nn/adapterframework/util/MessageUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.springframework.util.DigestUtils;
Expand All @@ -47,6 +48,17 @@
public abstract class MessageUtils {
private static final Logger LOG = LogUtil.getLogger(MessageUtils.class);
private static final int CHARSET_CONFIDENCE_LEVEL = AppConstants.getInstance().getInt("charset.confidenceLevel", 65);
private static final TikaConfig TIKA_CONFIG = createTikaConfig();
private static final int TIKA_MAGIC_LENGHT = 64 * 1024; // This needs to be reasonably large to be able to correctly detect things like XML root elements after initial comment and DTDs

private static TikaConfig createTikaConfig() {
try {
return new TikaConfig();
} catch (TikaException | IOException e) {
LOG.error("unable to create Tika config, cannot determine mimetypes!", e);
return null;
}
}

/**
* Fetch metadata from the {@link HttpServletRequest} such as Content-Length, Content-Type (mimetype + charset)
Expand Down Expand Up @@ -203,7 +215,7 @@ public static MimeType computeMimeType(Message message) {
/**
* Computes the {@link MimeType} when not available, attempts to resolve the Charset when of type TEXT.
* <p>
* NOTE: This is a resource intensive operation, the first 64k is being read and stored in memory.
* NOTE: This is a resource intensive operation, the first {@value #TIKA_MAGIC_LENGHT} bytes are being read and stored in memory.
*/
public static MimeType computeMimeType(Message message, String filename) {
if(Message.isEmpty(message)) {
Expand All @@ -224,15 +236,13 @@ public static MimeType computeMimeType(Message message, String filename) {
}

try {
TikaConfig tika = new TikaConfig();
Metadata metadata = new Metadata();
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, name);
int tikaMimeMagicLength = tika.getMimeRepository().getMinLength();
byte[] magic = message.getMagic(tikaMimeMagicLength);
byte[] magic = message.getMagic(TIKA_MAGIC_LENGHT);
if(magic.length == 0) {
return null;
}
org.apache.tika.mime.MediaType tikaMediaType = tika.getDetector().detect(new ByteArrayInputStream(magic), metadata);
org.apache.tika.mime.MediaType tikaMediaType = TIKA_CONFIG.getDetector().detect(new ByteArrayInputStream(magic), metadata);
MimeType mimeType = MimeType.valueOf(tikaMediaType.toString());
context.withMimeType(mimeType);
if("text".equals(mimeType.getType()) || message.getCharset() != null) { // is of type 'text' or message has charset
Expand Down

0 comments on commit 15fb5a2

Please sign in to comment.